// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
// Test host codegen.
// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK0
// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK1
// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK2
// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK3

// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY01 %s
// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY02 %s
// RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY03 %s

// Test target codegen - host bc file has to be created first.
// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK
// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK1
// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK2
// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK3

// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s
// RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
// RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY11 %s
// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY12 %s
// RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
// RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY13 %s

// expected-no-diagnostics
#ifndef HEADER
#define HEADER

template <typename tx, typename ty>
struct TT {
  tx X;
  ty Y;
};
#pragma omp declare target
int ga = 5;
#pragma omp end declare target




int foo(int n, double *ptr) {
  int a = 0;
  short aa = 0;
  float b[10];
  float bn[n];
  double c[5][10];
  double cn[5][n];
  TT<long long, char> d;
  const TT<int, int> e = {n, n};
  int *p __attribute__ ((aligned (64))) = &a;

#pragma omp target firstprivate(a, p, ga)
  {
  }

  // a is passed by value to tgt_target


#pragma omp target firstprivate(aa, b, bn, c, cn, d)
  {
    aa += 1;
    b[2] = 1.0;
    bn[3] = 1.0;
    c[1][2] = 1.0;
    cn[1][3] = 1.0;
    d.X = 1;
    d.Y = 1;
  }


  // firstprivate(aa) --> base_ptr = aa, ptr = aa, size = 2 (short)

  // firstprivate(b): base_ptr = &b[0], ptr = &b[0], size = 40 (sizeof(float)*10)

  // firstprivate(bn), 2 entries, n and bn: (1) base_ptr = n, ptr = n, size = 8 ; (2) base_ptr = &c[0], ptr = &c[0], size = n*sizeof(float)

  // firstprivate(c): base_ptr = &c[0], ptr = &c[0], size = 400 (5*10*sizeof(double))

  // firstprivate(cn), 3 entries, 5, n, cn: (1) base_ptr = 5, ptr = 5, size = 8; (2) (1) base_ptr = n, ptr = n, size = 8; (3) base_ptr = &cn[0], ptr = &cn[0], size = 5*n*sizeof(double)

  // firstprivate(d): base_ptr = &d, ptr = &d, size = 16


  // make sure that firstprivate variables are generated in all cases and that we use those instances for operations inside the
  // target region

  // firstprivate(aa): a_priv = a_in

  //  firstprivate(b): memcpy(b_priv,b_in)


  // firstprivate(bn)

  // firstprivate(c)

  // firstprivate(cn)

  // firstprivate(d)

#pragma omp target firstprivate(ptr, e)
  {
    ptr[0] = e.X;
    ptr[0]++;
  }




  return a;
}

template <typename tx>
tx ftemplate(int n) {
  tx a = 0;
  tx b[10];

#pragma omp target firstprivate(a, b)
  {
    a += 1;
    b[2] += 1;
  }

  return a;
}

static int fstatic(int n) {
  int a = 0;
  char aaa = 0;
  int b[10];

#pragma omp target firstprivate(a, aaa, b)
  {
    a += 1;
    aaa += 1;
    b[2] += 1;
  }

  return a;
}


// firstprivate(a): a_priv = a_in

// firstprivate(aaa)

// firstprivate(b)


struct S1 {
  double a;

  int r1(int n) {
    int b = n + 1;
    short int c[2][n];

#pragma omp target firstprivate(b, c)
    {
      this->a = (double)b + 1.5;
      c[1][1] = ++a;
    }

    return c[1][1] + (int)b;
  }

  // on the host side, we first generate r1, then the static function and the template above

  // map(this: this ptr is implicitly captured (not firstprivate matter)

  // firstprivate(b): base_ptr = b, ptr = b, size = 4 (pass by-value)

  // firstprivate(c), 3 entries: 2, n, c

  // only check that we use the map types stored in the global variable



  // firstprivate(b)


  // firstprivate(c)

  // finish

  // static host function

  // firstprivate(a): by value

  // firstprivate(aaa): by value

  // firstprivate(b): base_ptr = &b[0], ptr= &b[0]

  // only check that the right sizes and map types are used
};

int bar(int n, double *ptr) {
  int a = 0;
  a += foo(n, ptr);
  S1 S;
  a += S.r1(n);
  a += fstatic(n);
  a += ftemplate<int>(n);

  return a;
}

// template host and device


// firstprivate(a): by value

// firstprivate(b): pointer



// firstprivate(a)

// firstprivate(b)


#endif
// CHECK-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
// CHECK-SAME: () #[[ATTR5:[0-9]+]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    call void @__tgt_register_requires(i64 1)
// CHECK-NEXT:    ret void
// CHECK-64-LABEL: define {{[^@]+}}@_Z3fooiPd
// CHECK-64-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-64-NEXT:  entry:
// CHECK-64-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK-64-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 8
// CHECK-64-NEXT:    [[A:%.*]] = alloca i32, align 4
// CHECK-64-NEXT:    [[AA:%.*]] = alloca i16, align 2
// CHECK-64-NEXT:    [[B:%.*]] = alloca [10 x float], align 4
// CHECK-64-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 8
// CHECK-64-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// CHECK-64-NEXT:    [[C:%.*]] = alloca [5 x [10 x double]], align 8
// CHECK-64-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i64, align 8
// CHECK-64-NEXT:    [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 8
// CHECK-64-NEXT:    [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
// CHECK-64-NEXT:    [[P:%.*]] = alloca i32*, align 64
// CHECK-64-NEXT:    [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK-64-NEXT:    [[GA_CASTED:%.*]] = alloca i64, align 8
// CHECK-64-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 8
// CHECK-64-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 8
// CHECK-64-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 8
// CHECK-64-NEXT:    [[AA_CASTED:%.*]] = alloca i64, align 8
// CHECK-64-NEXT:    [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [9 x i8*], align 8
// CHECK-64-NEXT:    [[DOTOFFLOAD_PTRS5:%.*]] = alloca [9 x i8*], align 8
// CHECK-64-NEXT:    [[DOTOFFLOAD_MAPPERS6:%.*]] = alloca [9 x i8*], align 8
// CHECK-64-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 8
// CHECK-64-NEXT:    [[DOTOFFLOAD_BASEPTRS10:%.*]] = alloca [2 x i8*], align 8
// CHECK-64-NEXT:    [[DOTOFFLOAD_PTRS11:%.*]] = alloca [2 x i8*], align 8
// CHECK-64-NEXT:    [[DOTOFFLOAD_MAPPERS12:%.*]] = alloca [2 x i8*], align 8
// CHECK-64-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK-64-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 8
// CHECK-64-NEXT:    store i32 0, i32* [[A]], align 4
// CHECK-64-NEXT:    store i16 0, i16* [[AA]], align 2
// CHECK-64-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK-64-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
// CHECK-64-NEXT:    [[TMP2:%.*]] = call i8* @llvm.stacksave()
// CHECK-64-NEXT:    store i8* [[TMP2]], i8** [[SAVED_STACK]], align 8
// CHECK-64-NEXT:    [[VLA:%.*]] = alloca float, i64 [[TMP1]], align 4
// CHECK-64-NEXT:    store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8
// CHECK-64-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK-64-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
// CHECK-64-NEXT:    [[TMP5:%.*]] = mul nuw i64 5, [[TMP4]]
// CHECK-64-NEXT:    [[VLA1:%.*]] = alloca double, i64 [[TMP5]], align 8
// CHECK-64-NEXT:    store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8
// CHECK-64-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0
// CHECK-64-NEXT:    [[TMP6:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK-64-NEXT:    store i32 [[TMP6]], i32* [[X]], align 4
// CHECK-64-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1
// CHECK-64-NEXT:    [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK-64-NEXT:    store i32 [[TMP7]], i32* [[Y]], align 4
// CHECK-64-NEXT:    store i32* [[A]], i32** [[P]], align 64
// CHECK-64-NEXT:    [[TMP8:%.*]] = load i32, i32* [[A]], align 4
// CHECK-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32*
// CHECK-64-NEXT:    store i32 [[TMP8]], i32* [[CONV]], align 4
// CHECK-64-NEXT:    [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8
// CHECK-64-NEXT:    [[TMP10:%.*]] = load i32*, i32** [[P]], align 64
// CHECK-64-NEXT:    [[TMP11:%.*]] = load i32, i32* @ga, align 4
// CHECK-64-NEXT:    [[CONV2:%.*]] = bitcast i64* [[GA_CASTED]] to i32*
// CHECK-64-NEXT:    store i32 [[TMP11]], i32* [[CONV2]], align 4
// CHECK-64-NEXT:    [[TMP12:%.*]] = load i64, i64* [[GA_CASTED]], align 8
// CHECK-64-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK-64-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i64*
// CHECK-64-NEXT:    store i64 [[TMP9]], i64* [[TMP14]], align 8
// CHECK-64-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK-64-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64*
// CHECK-64-NEXT:    store i64 [[TMP9]], i64* [[TMP16]], align 8
// CHECK-64-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
// CHECK-64-NEXT:    store i8* null, i8** [[TMP17]], align 8
// CHECK-64-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK-64-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
// CHECK-64-NEXT:    store i32* [[TMP10]], i32** [[TMP19]], align 8
// CHECK-64-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK-64-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
// CHECK-64-NEXT:    store i32* [[TMP10]], i32** [[TMP21]], align 8
// CHECK-64-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
// CHECK-64-NEXT:    store i8* null, i8** [[TMP22]], align 8
// CHECK-64-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK-64-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i64*
// CHECK-64-NEXT:    store i64 [[TMP12]], i64* [[TMP24]], align 8
// CHECK-64-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK-64-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64*
// CHECK-64-NEXT:    store i64 [[TMP12]], i64* [[TMP26]], align 8
// CHECK-64-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
// CHECK-64-NEXT:    store i8* null, i8** [[TMP27]], align 8
// CHECK-64-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK-64-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK-64-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK-64-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK-64-NEXT:    store i32 2, i32* [[TMP30]], align 4
// CHECK-64-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK-64-NEXT:    store i32 3, i32* [[TMP31]], align 4
// CHECK-64-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK-64-NEXT:    store i8** [[TMP28]], i8*** [[TMP32]], align 8
// CHECK-64-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK-64-NEXT:    store i8** [[TMP29]], i8*** [[TMP33]], align 8
// CHECK-64-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK-64-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP34]], align 8
// CHECK-64-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK-64-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP35]], align 8
// CHECK-64-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK-64-NEXT:    store i8** null, i8*** [[TMP36]], align 8
// CHECK-64-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK-64-NEXT:    store i8** null, i8*** [[TMP37]], align 8
// CHECK-64-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK-64-NEXT:    store i64 0, i64* [[TMP38]], align 8
// CHECK-64-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK-64-NEXT:    store i64 0, i64* [[TMP39]], align 8
// CHECK-64-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK-64-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP40]], align 4
// CHECK-64-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK-64-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP41]], align 4
// CHECK-64-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK-64-NEXT:    store i32 0, i32* [[TMP42]], align 4
// CHECK-64-NEXT:    [[TMP43:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK-64-NEXT:    [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0
// CHECK-64-NEXT:    br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK-64:       omp_offload.failed:
// CHECK-64-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63(i64 [[TMP9]], i32* [[TMP10]], i64 [[TMP12]]) #[[ATTR3:[0-9]+]]
// CHECK-64-NEXT:    br label [[OMP_OFFLOAD_CONT]]
// CHECK-64:       omp_offload.cont:
// CHECK-64-NEXT:    [[TMP45:%.*]] = load i16, i16* [[AA]], align 2
// CHECK-64-NEXT:    [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16*
// CHECK-64-NEXT:    store i16 [[TMP45]], i16* [[CONV3]], align 2
// CHECK-64-NEXT:    [[TMP46:%.*]] = load i64, i64* [[AA_CASTED]], align 8
// CHECK-64-NEXT:    [[TMP47:%.*]] = mul nuw i64 [[TMP1]], 4
// CHECK-64-NEXT:    [[TMP48:%.*]] = mul nuw i64 5, [[TMP4]]
// CHECK-64-NEXT:    [[TMP49:%.*]] = mul nuw i64 [[TMP48]], 8
// CHECK-64-NEXT:    [[TMP50:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
// CHECK-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP50]], i8* align 8 bitcast ([9 x i64]* @.offload_sizes.1 to i8*), i64 72, i1 false)
// CHECK-64-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
// CHECK-64-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i64*
// CHECK-64-NEXT:    store i64 [[TMP46]], i64* [[TMP52]], align 8
// CHECK-64-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
// CHECK-64-NEXT:    [[TMP54:%.*]] = bitcast i8** [[TMP53]] to i64*
// CHECK-64-NEXT:    store i64 [[TMP46]], i64* [[TMP54]], align 8
// CHECK-64-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 0
// CHECK-64-NEXT:    store i8* null, i8** [[TMP55]], align 8
// CHECK-64-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
// CHECK-64-NEXT:    [[TMP57:%.*]] = bitcast i8** [[TMP56]] to [10 x float]**
// CHECK-64-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP57]], align 8
// CHECK-64-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
// CHECK-64-NEXT:    [[TMP59:%.*]] = bitcast i8** [[TMP58]] to [10 x float]**
// CHECK-64-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP59]], align 8
// CHECK-64-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 1
// CHECK-64-NEXT:    store i8* null, i8** [[TMP60]], align 8
// CHECK-64-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
// CHECK-64-NEXT:    [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i64*
// CHECK-64-NEXT:    store i64 [[TMP1]], i64* [[TMP62]], align 8
// CHECK-64-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
// CHECK-64-NEXT:    [[TMP64:%.*]] = bitcast i8** [[TMP63]] to i64*
// CHECK-64-NEXT:    store i64 [[TMP1]], i64* [[TMP64]], align 8
// CHECK-64-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 2
// CHECK-64-NEXT:    store i8* null, i8** [[TMP65]], align 8
// CHECK-64-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 3
// CHECK-64-NEXT:    [[TMP67:%.*]] = bitcast i8** [[TMP66]] to float**
// CHECK-64-NEXT:    store float* [[VLA]], float** [[TMP67]], align 8
// CHECK-64-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 3
// CHECK-64-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to float**
// CHECK-64-NEXT:    store float* [[VLA]], float** [[TMP69]], align 8
// CHECK-64-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
// CHECK-64-NEXT:    store i64 [[TMP47]], i64* [[TMP70]], align 8
// CHECK-64-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 3
// CHECK-64-NEXT:    store i8* null, i8** [[TMP71]], align 8
// CHECK-64-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 4
// CHECK-64-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to [5 x [10 x double]]**
// CHECK-64-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP73]], align 8
// CHECK-64-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 4
// CHECK-64-NEXT:    [[TMP75:%.*]] = bitcast i8** [[TMP74]] to [5 x [10 x double]]**
// CHECK-64-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP75]], align 8
// CHECK-64-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 4
// CHECK-64-NEXT:    store i8* null, i8** [[TMP76]], align 8
// CHECK-64-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 5
// CHECK-64-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i64*
// CHECK-64-NEXT:    store i64 5, i64* [[TMP78]], align 8
// CHECK-64-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 5
// CHECK-64-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i64*
// CHECK-64-NEXT:    store i64 5, i64* [[TMP80]], align 8
// CHECK-64-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 5
// CHECK-64-NEXT:    store i8* null, i8** [[TMP81]], align 8
// CHECK-64-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 6
// CHECK-64-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i64*
// CHECK-64-NEXT:    store i64 [[TMP4]], i64* [[TMP83]], align 8
// CHECK-64-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 6
// CHECK-64-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i64*
// CHECK-64-NEXT:    store i64 [[TMP4]], i64* [[TMP85]], align 8
// CHECK-64-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 6
// CHECK-64-NEXT:    store i8* null, i8** [[TMP86]], align 8
// CHECK-64-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 7
// CHECK-64-NEXT:    [[TMP88:%.*]] = bitcast i8** [[TMP87]] to double**
// CHECK-64-NEXT:    store double* [[VLA1]], double** [[TMP88]], align 8
// CHECK-64-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 7
// CHECK-64-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to double**
// CHECK-64-NEXT:    store double* [[VLA1]], double** [[TMP90]], align 8
// CHECK-64-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
// CHECK-64-NEXT:    store i64 [[TMP49]], i64* [[TMP91]], align 8
// CHECK-64-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 7
// CHECK-64-NEXT:    store i8* null, i8** [[TMP92]], align 8
// CHECK-64-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 8
// CHECK-64-NEXT:    [[TMP94:%.*]] = bitcast i8** [[TMP93]] to %struct.TT**
// CHECK-64-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP94]], align 8
// CHECK-64-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 8
// CHECK-64-NEXT:    [[TMP96:%.*]] = bitcast i8** [[TMP95]] to %struct.TT**
// CHECK-64-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP96]], align 8
// CHECK-64-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 8
// CHECK-64-NEXT:    store i8* null, i8** [[TMP97]], align 8
// CHECK-64-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
// CHECK-64-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
// CHECK-64-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
// CHECK-64-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
// CHECK-64-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 0
// CHECK-64-NEXT:    store i32 2, i32* [[TMP101]], align 4
// CHECK-64-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 1
// CHECK-64-NEXT:    store i32 9, i32* [[TMP102]], align 4
// CHECK-64-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 2
// CHECK-64-NEXT:    store i8** [[TMP98]], i8*** [[TMP103]], align 8
// CHECK-64-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 3
// CHECK-64-NEXT:    store i8** [[TMP99]], i8*** [[TMP104]], align 8
// CHECK-64-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 4
// CHECK-64-NEXT:    store i64* [[TMP100]], i64** [[TMP105]], align 8
// CHECK-64-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 5
// CHECK-64-NEXT:    store i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.2, i32 0, i32 0), i64** [[TMP106]], align 8
// CHECK-64-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 6
// CHECK-64-NEXT:    store i8** null, i8*** [[TMP107]], align 8
// CHECK-64-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 7
// CHECK-64-NEXT:    store i8** null, i8*** [[TMP108]], align 8
// CHECK-64-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 8
// CHECK-64-NEXT:    store i64 0, i64* [[TMP109]], align 8
// CHECK-64-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 9
// CHECK-64-NEXT:    store i64 0, i64* [[TMP110]], align 8
// CHECK-64-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 10
// CHECK-64-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP111]], align 4
// CHECK-64-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 11
// CHECK-64-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP112]], align 4
// CHECK-64-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 12
// CHECK-64-NEXT:    store i32 0, i32* [[TMP113]], align 4
// CHECK-64-NEXT:    [[TMP114:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]])
// CHECK-64-NEXT:    [[TMP115:%.*]] = icmp ne i32 [[TMP114]], 0
// CHECK-64-NEXT:    br i1 [[TMP115]], label [[OMP_OFFLOAD_FAILED8:%.*]], label [[OMP_OFFLOAD_CONT9:%.*]]
// CHECK-64:       omp_offload.failed8:
// CHECK-64-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70(i64 [[TMP46]], [10 x float]* [[B]], i64 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP4]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR3]]
// CHECK-64-NEXT:    br label [[OMP_OFFLOAD_CONT9]]
// CHECK-64:       omp_offload.cont9:
// CHECK-64-NEXT:    [[TMP116:%.*]] = load double*, double** [[PTR_ADDR]], align 8
// CHECK-64-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0
// CHECK-64-NEXT:    [[TMP118:%.*]] = bitcast i8** [[TMP117]] to double**
// CHECK-64-NEXT:    store double* [[TMP116]], double** [[TMP118]], align 8
// CHECK-64-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 0
// CHECK-64-NEXT:    [[TMP120:%.*]] = bitcast i8** [[TMP119]] to double**
// CHECK-64-NEXT:    store double* [[TMP116]], double** [[TMP120]], align 8
// CHECK-64-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 0
// CHECK-64-NEXT:    store i8* null, i8** [[TMP121]], align 8
// CHECK-64-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 1
// CHECK-64-NEXT:    [[TMP123:%.*]] = bitcast i8** [[TMP122]] to %struct.TT.0**
// CHECK-64-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[TMP123]], align 8
// CHECK-64-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 1
// CHECK-64-NEXT:    [[TMP125:%.*]] = bitcast i8** [[TMP124]] to %struct.TT.0**
// CHECK-64-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[TMP125]], align 8
// CHECK-64-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 1
// CHECK-64-NEXT:    store i8* null, i8** [[TMP126]], align 8
// CHECK-64-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0
// CHECK-64-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 0
// CHECK-64-NEXT:    [[KERNEL_ARGS13:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
// CHECK-64-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 0
// CHECK-64-NEXT:    store i32 2, i32* [[TMP129]], align 4
// CHECK-64-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 1
// CHECK-64-NEXT:    store i32 2, i32* [[TMP130]], align 4
// CHECK-64-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 2
// CHECK-64-NEXT:    store i8** [[TMP127]], i8*** [[TMP131]], align 8
// CHECK-64-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 3
// CHECK-64-NEXT:    store i8** [[TMP128]], i8*** [[TMP132]], align 8
// CHECK-64-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 4
// CHECK-64-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.3, i32 0, i32 0), i64** [[TMP133]], align 8
// CHECK-64-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 5
// CHECK-64-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.4, i32 0, i32 0), i64** [[TMP134]], align 8
// CHECK-64-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 6
// CHECK-64-NEXT:    store i8** null, i8*** [[TMP135]], align 8
// CHECK-64-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 7
// CHECK-64-NEXT:    store i8** null, i8*** [[TMP136]], align 8
// CHECK-64-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 8
// CHECK-64-NEXT:    store i64 0, i64* [[TMP137]], align 8
// CHECK-64-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 9
// CHECK-64-NEXT:    store i64 0, i64* [[TMP138]], align 8
// CHECK-64-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 10
// CHECK-64-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP139]], align 4
// CHECK-64-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 11
// CHECK-64-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP140]], align 4
// CHECK-64-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 12
// CHECK-64-NEXT:    store i32 0, i32* [[TMP141]], align 4
// CHECK-64-NEXT:    [[TMP142:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]])
// CHECK-64-NEXT:    [[TMP143:%.*]] = icmp ne i32 [[TMP142]], 0
// CHECK-64-NEXT:    br i1 [[TMP143]], label [[OMP_OFFLOAD_FAILED14:%.*]], label [[OMP_OFFLOAD_CONT15:%.*]]
// CHECK-64:       omp_offload.failed14:
// CHECK-64-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111(double* [[TMP116]], %struct.TT.0* [[E]]) #[[ATTR3]]
// CHECK-64-NEXT:    br label [[OMP_OFFLOAD_CONT15]]
// CHECK-64:       omp_offload.cont15:
// CHECK-64-NEXT:    [[TMP144:%.*]] = load i32, i32* [[A]], align 4
// CHECK-64-NEXT:    [[TMP145:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
// CHECK-64-NEXT:    call void @llvm.stackrestore(i8* [[TMP145]])
// CHECK-64-NEXT:    ret i32 [[TMP144]]
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63
// CHECK-64-SAME: (i64 noundef [[A:%.*]], i32* noundef [[P:%.*]], i64 noundef [[GA:%.*]]) #[[ATTR2:[0-9]+]] {
// CHECK-64-NEXT:  entry:
// CHECK-64-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK-64-NEXT:    [[P_ADDR:%.*]] = alloca i32*, align 8
// CHECK-64-NEXT:    [[GA_ADDR:%.*]] = alloca i64, align 8
// CHECK-64-NEXT:    store i64 [[A]], i64* [[A_ADDR]], align 8
// CHECK-64-NEXT:    store i32* [[P]], i32** [[P_ADDR]], align 8
// CHECK-64-NEXT:    store i64 [[GA]], i64* [[GA_ADDR]], align 8
// CHECK-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
// CHECK-64-NEXT:    [[CONV1:%.*]] = bitcast i64* [[GA_ADDR]] to i32*
// CHECK-64-NEXT:    ret void
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70
// CHECK-64-SAME: (i64 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR2]] {
// CHECK-64-NEXT:  entry:
// CHECK-64-NEXT:    [[AA_ADDR:%.*]] = alloca i64, align 8
// CHECK-64-NEXT:    [[B_ADDR:%.*]] = alloca [10 x float]*, align 8
// CHECK-64-NEXT:    [[VLA_ADDR:%.*]] = alloca i64, align 8
// CHECK-64-NEXT:    [[BN_ADDR:%.*]] = alloca float*, align 8
// CHECK-64-NEXT:    [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 8
// CHECK-64-NEXT:    [[VLA_ADDR2:%.*]] = alloca i64, align 8
// CHECK-64-NEXT:    [[VLA_ADDR4:%.*]] = alloca i64, align 8
// CHECK-64-NEXT:    [[CN_ADDR:%.*]] = alloca double*, align 8
// CHECK-64-NEXT:    [[D_ADDR:%.*]] = alloca %struct.TT*, align 8
// CHECK-64-NEXT:    [[B5:%.*]] = alloca [10 x float], align 4
// CHECK-64-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 8
// CHECK-64-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// CHECK-64-NEXT:    [[C7:%.*]] = alloca [5 x [10 x double]], align 8
// CHECK-64-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i64, align 8
// CHECK-64-NEXT:    [[__VLA_EXPR2:%.*]] = alloca i64, align 8
// CHECK-64-NEXT:    [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 8
// CHECK-64-NEXT:    store i64 [[AA]], i64* [[AA_ADDR]], align 8
// CHECK-64-NEXT:    store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8
// CHECK-64-NEXT:    store i64 [[VLA]], i64* [[VLA_ADDR]], align 8
// CHECK-64-NEXT:    store float* [[BN]], float** [[BN_ADDR]], align 8
// CHECK-64-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 8
// CHECK-64-NEXT:    store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8
// CHECK-64-NEXT:    store i64 [[VLA3]], i64* [[VLA_ADDR4]], align 8
// CHECK-64-NEXT:    store double* [[CN]], double** [[CN_ADDR]], align 8
// CHECK-64-NEXT:    store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 8
// CHECK-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16*
// CHECK-64-NEXT:    [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 8
// CHECK-64-NEXT:    [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8
// CHECK-64-NEXT:    [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 8
// CHECK-64-NEXT:    [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 8
// CHECK-64-NEXT:    [[TMP4:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8
// CHECK-64-NEXT:    [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8
// CHECK-64-NEXT:    [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8
// CHECK-64-NEXT:    [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8
// CHECK-64-NEXT:    [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8*
// CHECK-64-NEXT:    [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8*
// CHECK-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i64 40, i1 false)
// CHECK-64-NEXT:    [[TMP10:%.*]] = call i8* @llvm.stacksave()
// CHECK-64-NEXT:    store i8* [[TMP10]], i8** [[SAVED_STACK]], align 8
// CHECK-64-NEXT:    [[VLA6:%.*]] = alloca float, i64 [[TMP1]], align 4
// CHECK-64-NEXT:    store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8
// CHECK-64-NEXT:    [[TMP11:%.*]] = mul nuw i64 [[TMP1]], 4
// CHECK-64-NEXT:    [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8*
// CHECK-64-NEXT:    [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8*
// CHECK-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 [[TMP11]], i1 false)
// CHECK-64-NEXT:    [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8*
// CHECK-64-NEXT:    [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8*
// CHECK-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 400, i1 false)
// CHECK-64-NEXT:    [[TMP16:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]]
// CHECK-64-NEXT:    [[VLA8:%.*]] = alloca double, i64 [[TMP16]], align 8
// CHECK-64-NEXT:    store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8
// CHECK-64-NEXT:    store i64 [[TMP5]], i64* [[__VLA_EXPR2]], align 8
// CHECK-64-NEXT:    [[TMP17:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]]
// CHECK-64-NEXT:    [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 8
// CHECK-64-NEXT:    [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8*
// CHECK-64-NEXT:    [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8*
// CHECK-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i64 [[TMP18]], i1 false)
// CHECK-64-NEXT:    [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8*
// CHECK-64-NEXT:    [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8*
// CHECK-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP21]], i8* align 8 [[TMP22]], i64 16, i1 false)
// CHECK-64-NEXT:    [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2
// CHECK-64-NEXT:    [[CONV10:%.*]] = sext i16 [[TMP23]] to i32
// CHECK-64-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV10]], 1
// CHECK-64-NEXT:    [[CONV11:%.*]] = trunc i32 [[ADD]] to i16
// CHECK-64-NEXT:    store i16 [[CONV11]], i16* [[CONV]], align 2
// CHECK-64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i64 0, i64 2
// CHECK-64-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX]], align 4
// CHECK-64-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i64 3
// CHECK-64-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX12]], align 4
// CHECK-64-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i64 0, i64 1
// CHECK-64-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i64 0, i64 2
// CHECK-64-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX14]], align 8
// CHECK-64-NEXT:    [[TMP24:%.*]] = mul nsw i64 1, [[TMP5]]
// CHECK-64-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i64 [[TMP24]]
// CHECK-64-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i64 3
// CHECK-64-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX16]], align 8
// CHECK-64-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0
// CHECK-64-NEXT:    store i64 1, i64* [[X]], align 8
// CHECK-64-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1
// CHECK-64-NEXT:    store i8 1, i8* [[Y]], align 8
// CHECK-64-NEXT:    [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
// CHECK-64-NEXT:    call void @llvm.stackrestore(i8* [[TMP25]])
// CHECK-64-NEXT:    ret void
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111
// CHECK-64-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR2]] {
// CHECK-64-NEXT:  entry:
// CHECK-64-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 8
// CHECK-64-NEXT:    [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 8
// CHECK-64-NEXT:    [[E1:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
// CHECK-64-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 8
// CHECK-64-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 8
// CHECK-64-NEXT:    [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 8
// CHECK-64-NEXT:    [[TMP1:%.*]] = bitcast %struct.TT.0* [[E1]] to i8*
// CHECK-64-NEXT:    [[TMP2:%.*]] = bitcast %struct.TT.0* [[TMP0]] to i8*
// CHECK-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 8, i1 false)
// CHECK-64-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E1]], i32 0, i32 0
// CHECK-64-NEXT:    [[TMP3:%.*]] = load i32, i32* [[X]], align 4
// CHECK-64-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP3]] to double
// CHECK-64-NEXT:    [[TMP4:%.*]] = load double*, double** [[PTR_ADDR]], align 8
// CHECK-64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP4]], i64 0
// CHECK-64-NEXT:    store double [[CONV]], double* [[ARRAYIDX]], align 8
// CHECK-64-NEXT:    [[TMP5:%.*]] = load double*, double** [[PTR_ADDR]], align 8
// CHECK-64-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[TMP5]], i64 0
// CHECK-64-NEXT:    [[TMP6:%.*]] = load double, double* [[ARRAYIDX2]], align 8
// CHECK-64-NEXT:    [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00
// CHECK-64-NEXT:    store double [[INC]], double* [[ARRAYIDX2]], align 8
// CHECK-64-NEXT:    ret void
// CHECK-64-LABEL: define {{[^@]+}}@_Z3bariPd
// CHECK-64-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] {
// CHECK-64-NEXT:  entry:
// CHECK-64-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK-64-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 8
// CHECK-64-NEXT:    [[A:%.*]] = alloca i32, align 4
// CHECK-64-NEXT:    [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 8
// CHECK-64-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK-64-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 8
// CHECK-64-NEXT:    store i32 0, i32* [[A]], align 4
// CHECK-64-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK-64-NEXT:    [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 8
// CHECK-64-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z3fooiPd(i32 noundef signext [[TMP0]], double* noundef [[TMP1]])
// CHECK-64-NEXT:    [[TMP2:%.*]] = load i32, i32* [[A]], align 4
// CHECK-64-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]]
// CHECK-64-NEXT:    store i32 [[ADD]], i32* [[A]], align 4
// CHECK-64-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK-64-NEXT:    [[CALL1:%.*]] = call noundef signext i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 8 dereferenceable(8) [[S]], i32 noundef signext [[TMP3]])
// CHECK-64-NEXT:    [[TMP4:%.*]] = load i32, i32* [[A]], align 4
// CHECK-64-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]]
// CHECK-64-NEXT:    store i32 [[ADD2]], i32* [[A]], align 4
// CHECK-64-NEXT:    [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK-64-NEXT:    [[CALL3:%.*]] = call noundef signext i32 @_ZL7fstatici(i32 noundef signext [[TMP5]])
// CHECK-64-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A]], align 4
// CHECK-64-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]]
// CHECK-64-NEXT:    store i32 [[ADD4]], i32* [[A]], align 4
// CHECK-64-NEXT:    [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK-64-NEXT:    [[CALL5:%.*]] = call noundef signext i32 @_Z9ftemplateIiET_i(i32 noundef signext [[TMP7]])
// CHECK-64-NEXT:    [[TMP8:%.*]] = load i32, i32* [[A]], align 4
// CHECK-64-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]]
// CHECK-64-NEXT:    store i32 [[ADD6]], i32* [[A]], align 4
// CHECK-64-NEXT:    [[TMP9:%.*]] = load i32, i32* [[A]], align 4
// CHECK-64-NEXT:    ret i32 [[TMP9]]
// CHECK-64-LABEL: define {{[^@]+}}@_ZN2S12r1Ei
// CHECK-64-SAME: (%struct.S1* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat align 2 {
// CHECK-64-NEXT:  entry:
// CHECK-64-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
// CHECK-64-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK-64-NEXT:    [[B:%.*]] = alloca i32, align 4
// CHECK-64-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 8
// CHECK-64-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// CHECK-64-NEXT:    [[B_CASTED:%.*]] = alloca i64, align 8
// CHECK-64-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [5 x i8*], align 8
// CHECK-64-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x i8*], align 8
// CHECK-64-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x i8*], align 8
// CHECK-64-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 8
// CHECK-64-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
// CHECK-64-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK-64-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
// CHECK-64-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK-64-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// CHECK-64-NEXT:    store i32 [[ADD]], i32* [[B]], align 4
// CHECK-64-NEXT:    [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK-64-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
// CHECK-64-NEXT:    [[TMP3:%.*]] = call i8* @llvm.stacksave()
// CHECK-64-NEXT:    store i8* [[TMP3]], i8** [[SAVED_STACK]], align 8
// CHECK-64-NEXT:    [[TMP4:%.*]] = mul nuw i64 2, [[TMP2]]
// CHECK-64-NEXT:    [[VLA:%.*]] = alloca i16, i64 [[TMP4]], align 2
// CHECK-64-NEXT:    store i64 [[TMP2]], i64* [[__VLA_EXPR0]], align 8
// CHECK-64-NEXT:    [[TMP5:%.*]] = load i32, i32* [[B]], align 4
// CHECK-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[B_CASTED]] to i32*
// CHECK-64-NEXT:    store i32 [[TMP5]], i32* [[CONV]], align 4
// CHECK-64-NEXT:    [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8
// CHECK-64-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
// CHECK-64-NEXT:    [[TMP7:%.*]] = mul nuw i64 2, [[TMP2]]
// CHECK-64-NEXT:    [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2
// CHECK-64-NEXT:    [[TMP9:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
// CHECK-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP9]], i8* align 8 bitcast ([5 x i64]* @.offload_sizes.5 to i8*), i64 40, i1 false)
// CHECK-64-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK-64-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
// CHECK-64-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 8
// CHECK-64-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK-64-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
// CHECK-64-NEXT:    store double* [[A]], double** [[TMP13]], align 8
// CHECK-64-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
// CHECK-64-NEXT:    store i8* null, i8** [[TMP14]], align 8
// CHECK-64-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK-64-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64*
// CHECK-64-NEXT:    store i64 [[TMP6]], i64* [[TMP16]], align 8
// CHECK-64-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK-64-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i64*
// CHECK-64-NEXT:    store i64 [[TMP6]], i64* [[TMP18]], align 8
// CHECK-64-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
// CHECK-64-NEXT:    store i8* null, i8** [[TMP19]], align 8
// CHECK-64-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK-64-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i64*
// CHECK-64-NEXT:    store i64 2, i64* [[TMP21]], align 8
// CHECK-64-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK-64-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i64*
// CHECK-64-NEXT:    store i64 2, i64* [[TMP23]], align 8
// CHECK-64-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
// CHECK-64-NEXT:    store i8* null, i8** [[TMP24]], align 8
// CHECK-64-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
// CHECK-64-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64*
// CHECK-64-NEXT:    store i64 [[TMP2]], i64* [[TMP26]], align 8
// CHECK-64-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
// CHECK-64-NEXT:    [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i64*
// CHECK-64-NEXT:    store i64 [[TMP2]], i64* [[TMP28]], align 8
// CHECK-64-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
// CHECK-64-NEXT:    store i8* null, i8** [[TMP29]], align 8
// CHECK-64-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
// CHECK-64-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i16**
// CHECK-64-NEXT:    store i16* [[VLA]], i16** [[TMP31]], align 8
// CHECK-64-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
// CHECK-64-NEXT:    [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i16**
// CHECK-64-NEXT:    store i16* [[VLA]], i16** [[TMP33]], align 8
// CHECK-64-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
// CHECK-64-NEXT:    store i64 [[TMP8]], i64* [[TMP34]], align 8
// CHECK-64-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
// CHECK-64-NEXT:    store i8* null, i8** [[TMP35]], align 8
// CHECK-64-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK-64-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK-64-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
// CHECK-64-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK-64-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK-64-NEXT:    store i32 2, i32* [[TMP39]], align 4
// CHECK-64-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK-64-NEXT:    store i32 5, i32* [[TMP40]], align 4
// CHECK-64-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK-64-NEXT:    store i8** [[TMP36]], i8*** [[TMP41]], align 8
// CHECK-64-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK-64-NEXT:    store i8** [[TMP37]], i8*** [[TMP42]], align 8
// CHECK-64-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK-64-NEXT:    store i64* [[TMP38]], i64** [[TMP43]], align 8
// CHECK-64-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK-64-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.6, i32 0, i32 0), i64** [[TMP44]], align 8
// CHECK-64-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK-64-NEXT:    store i8** null, i8*** [[TMP45]], align 8
// CHECK-64-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK-64-NEXT:    store i8** null, i8*** [[TMP46]], align 8
// CHECK-64-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK-64-NEXT:    store i64 0, i64* [[TMP47]], align 8
// CHECK-64-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK-64-NEXT:    store i64 0, i64* [[TMP48]], align 8
// CHECK-64-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK-64-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP49]], align 4
// CHECK-64-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK-64-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP50]], align 4
// CHECK-64-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK-64-NEXT:    store i32 0, i32* [[TMP51]], align 4
// CHECK-64-NEXT:    [[TMP52:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK-64-NEXT:    [[TMP53:%.*]] = icmp ne i32 [[TMP52]], 0
// CHECK-64-NEXT:    br i1 [[TMP53]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK-64:       omp_offload.failed:
// CHECK-64-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR3]]
// CHECK-64-NEXT:    br label [[OMP_OFFLOAD_CONT]]
// CHECK-64:       omp_offload.cont:
// CHECK-64-NEXT:    [[TMP54:%.*]] = mul nsw i64 1, [[TMP2]]
// CHECK-64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP54]]
// CHECK-64-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1
// CHECK-64-NEXT:    [[TMP55:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
// CHECK-64-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP55]] to i32
// CHECK-64-NEXT:    [[TMP56:%.*]] = load i32, i32* [[B]], align 4
// CHECK-64-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP56]]
// CHECK-64-NEXT:    [[TMP57:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
// CHECK-64-NEXT:    call void @llvm.stackrestore(i8* [[TMP57]])
// CHECK-64-NEXT:    ret i32 [[ADD4]]
// CHECK-64-LABEL: define {{[^@]+}}@_ZL7fstatici
// CHECK-64-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] {
// CHECK-64-NEXT:  entry:
// CHECK-64-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK-64-NEXT:    [[A:%.*]] = alloca i32, align 4
// CHECK-64-NEXT:    [[AAA:%.*]] = alloca i8, align 1
// CHECK-64-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
// CHECK-64-NEXT:    [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK-64-NEXT:    [[AAA_CASTED:%.*]] = alloca i64, align 8
// CHECK-64-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 8
// CHECK-64-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 8
// CHECK-64-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 8
// CHECK-64-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK-64-NEXT:    store i32 0, i32* [[A]], align 4
// CHECK-64-NEXT:    store i8 0, i8* [[AAA]], align 1
// CHECK-64-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
// CHECK-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32*
// CHECK-64-NEXT:    store i32 [[TMP0]], i32* [[CONV]], align 4
// CHECK-64-NEXT:    [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8
// CHECK-64-NEXT:    [[TMP2:%.*]] = load i8, i8* [[AAA]], align 1
// CHECK-64-NEXT:    [[CONV1:%.*]] = bitcast i64* [[AAA_CASTED]] to i8*
// CHECK-64-NEXT:    store i8 [[TMP2]], i8* [[CONV1]], align 1
// CHECK-64-NEXT:    [[TMP3:%.*]] = load i64, i64* [[AAA_CASTED]], align 8
// CHECK-64-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK-64-NEXT:    [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i64*
// CHECK-64-NEXT:    store i64 [[TMP1]], i64* [[TMP5]], align 8
// CHECK-64-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK-64-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64*
// CHECK-64-NEXT:    store i64 [[TMP1]], i64* [[TMP7]], align 8
// CHECK-64-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
// CHECK-64-NEXT:    store i8* null, i8** [[TMP8]], align 8
// CHECK-64-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK-64-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
// CHECK-64-NEXT:    store i64 [[TMP3]], i64* [[TMP10]], align 8
// CHECK-64-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK-64-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i64*
// CHECK-64-NEXT:    store i64 [[TMP3]], i64* [[TMP12]], align 8
// CHECK-64-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
// CHECK-64-NEXT:    store i8* null, i8** [[TMP13]], align 8
// CHECK-64-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK-64-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to [10 x i32]**
// CHECK-64-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP15]], align 8
// CHECK-64-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK-64-NEXT:    [[TMP17:%.*]] = bitcast i8** [[TMP16]] to [10 x i32]**
// CHECK-64-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP17]], align 8
// CHECK-64-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
// CHECK-64-NEXT:    store i8* null, i8** [[TMP18]], align 8
// CHECK-64-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK-64-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK-64-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK-64-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK-64-NEXT:    store i32 2, i32* [[TMP21]], align 4
// CHECK-64-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK-64-NEXT:    store i32 3, i32* [[TMP22]], align 4
// CHECK-64-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK-64-NEXT:    store i8** [[TMP19]], i8*** [[TMP23]], align 8
// CHECK-64-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK-64-NEXT:    store i8** [[TMP20]], i8*** [[TMP24]], align 8
// CHECK-64-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK-64-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.7, i32 0, i32 0), i64** [[TMP25]], align 8
// CHECK-64-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK-64-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.8, i32 0, i32 0), i64** [[TMP26]], align 8
// CHECK-64-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK-64-NEXT:    store i8** null, i8*** [[TMP27]], align 8
// CHECK-64-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK-64-NEXT:    store i8** null, i8*** [[TMP28]], align 8
// CHECK-64-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK-64-NEXT:    store i64 0, i64* [[TMP29]], align 8
// CHECK-64-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK-64-NEXT:    store i64 0, i64* [[TMP30]], align 8
// CHECK-64-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK-64-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP31]], align 4
// CHECK-64-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK-64-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP32]], align 4
// CHECK-64-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK-64-NEXT:    store i32 0, i32* [[TMP33]], align 4
// CHECK-64-NEXT:    [[TMP34:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK-64-NEXT:    [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0
// CHECK-64-NEXT:    br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK-64:       omp_offload.failed:
// CHECK-64-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142(i64 [[TMP1]], i64 [[TMP3]], [10 x i32]* [[B]]) #[[ATTR3]]
// CHECK-64-NEXT:    br label [[OMP_OFFLOAD_CONT]]
// CHECK-64:       omp_offload.cont:
// CHECK-64-NEXT:    [[TMP36:%.*]] = load i32, i32* [[A]], align 4
// CHECK-64-NEXT:    ret i32 [[TMP36]]
// CHECK-64-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i
// CHECK-64-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat {
// CHECK-64-NEXT:  entry:
// CHECK-64-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK-64-NEXT:    [[A:%.*]] = alloca i32, align 4
// CHECK-64-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
// CHECK-64-NEXT:    [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK-64-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x i8*], align 8
// CHECK-64-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x i8*], align 8
// CHECK-64-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x i8*], align 8
// CHECK-64-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK-64-NEXT:    store i32 0, i32* [[A]], align 4
// CHECK-64-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
// CHECK-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32*
// CHECK-64-NEXT:    store i32 [[TMP0]], i32* [[CONV]], align 4
// CHECK-64-NEXT:    [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8
// CHECK-64-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK-64-NEXT:    [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i64*
// CHECK-64-NEXT:    store i64 [[TMP1]], i64* [[TMP3]], align 8
// CHECK-64-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK-64-NEXT:    [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i64*
// CHECK-64-NEXT:    store i64 [[TMP1]], i64* [[TMP5]], align 8
// CHECK-64-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
// CHECK-64-NEXT:    store i8* null, i8** [[TMP6]], align 8
// CHECK-64-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK-64-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to [10 x i32]**
// CHECK-64-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP8]], align 8
// CHECK-64-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK-64-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to [10 x i32]**
// CHECK-64-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP10]], align 8
// CHECK-64-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
// CHECK-64-NEXT:    store i8* null, i8** [[TMP11]], align 8
// CHECK-64-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK-64-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK-64-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK-64-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK-64-NEXT:    store i32 2, i32* [[TMP14]], align 4
// CHECK-64-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK-64-NEXT:    store i32 2, i32* [[TMP15]], align 4
// CHECK-64-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK-64-NEXT:    store i8** [[TMP12]], i8*** [[TMP16]], align 8
// CHECK-64-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK-64-NEXT:    store i8** [[TMP13]], i8*** [[TMP17]], align 8
// CHECK-64-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK-64-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.9, i32 0, i32 0), i64** [[TMP18]], align 8
// CHECK-64-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK-64-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.10, i32 0, i32 0), i64** [[TMP19]], align 8
// CHECK-64-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK-64-NEXT:    store i8** null, i8*** [[TMP20]], align 8
// CHECK-64-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK-64-NEXT:    store i8** null, i8*** [[TMP21]], align 8
// CHECK-64-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK-64-NEXT:    store i64 0, i64* [[TMP22]], align 8
// CHECK-64-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK-64-NEXT:    store i64 0, i64* [[TMP23]], align 8
// CHECK-64-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK-64-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP24]], align 4
// CHECK-64-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK-64-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP25]], align 4
// CHECK-64-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK-64-NEXT:    store i32 0, i32* [[TMP26]], align 4
// CHECK-64-NEXT:    [[TMP27:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK-64-NEXT:    [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0
// CHECK-64-NEXT:    br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK-64:       omp_offload.failed:
// CHECK-64-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128(i64 [[TMP1]], [10 x i32]* [[B]]) #[[ATTR3]]
// CHECK-64-NEXT:    br label [[OMP_OFFLOAD_CONT]]
// CHECK-64:       omp_offload.cont:
// CHECK-64-NEXT:    [[TMP29:%.*]] = load i32, i32* [[A]], align 4
// CHECK-64-NEXT:    ret i32 [[TMP29]]
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167
// CHECK-64-SAME: (%struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] {
// CHECK-64-NEXT:  entry:
// CHECK-64-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
// CHECK-64-NEXT:    [[B_ADDR:%.*]] = alloca i64, align 8
// CHECK-64-NEXT:    [[VLA_ADDR:%.*]] = alloca i64, align 8
// CHECK-64-NEXT:    [[VLA_ADDR2:%.*]] = alloca i64, align 8
// CHECK-64-NEXT:    [[C_ADDR:%.*]] = alloca i16*, align 8
// CHECK-64-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 8
// CHECK-64-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// CHECK-64-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i64, align 8
// CHECK-64-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
// CHECK-64-NEXT:    store i64 [[B]], i64* [[B_ADDR]], align 8
// CHECK-64-NEXT:    store i64 [[VLA]], i64* [[VLA_ADDR]], align 8
// CHECK-64-NEXT:    store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8
// CHECK-64-NEXT:    store i16* [[C]], i16** [[C_ADDR]], align 8
// CHECK-64-NEXT:    [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
// CHECK-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32*
// CHECK-64-NEXT:    [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8
// CHECK-64-NEXT:    [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8
// CHECK-64-NEXT:    [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8
// CHECK-64-NEXT:    [[TMP4:%.*]] = call i8* @llvm.stacksave()
// CHECK-64-NEXT:    store i8* [[TMP4]], i8** [[SAVED_STACK]], align 8
// CHECK-64-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]]
// CHECK-64-NEXT:    [[VLA3:%.*]] = alloca i16, i64 [[TMP5]], align 2
// CHECK-64-NEXT:    store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8
// CHECK-64-NEXT:    store i64 [[TMP2]], i64* [[__VLA_EXPR1]], align 8
// CHECK-64-NEXT:    [[TMP6:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]]
// CHECK-64-NEXT:    [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 2
// CHECK-64-NEXT:    [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8*
// CHECK-64-NEXT:    [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8*
// CHECK-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i64 [[TMP7]], i1 false)
// CHECK-64-NEXT:    [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4
// CHECK-64-NEXT:    [[CONV4:%.*]] = sitofp i32 [[TMP10]] to double
// CHECK-64-NEXT:    [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00
// CHECK-64-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0
// CHECK-64-NEXT:    store double [[ADD]], double* [[A]], align 8
// CHECK-64-NEXT:    [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0
// CHECK-64-NEXT:    [[TMP11:%.*]] = load double, double* [[A5]], align 8
// CHECK-64-NEXT:    [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00
// CHECK-64-NEXT:    store double [[INC]], double* [[A5]], align 8
// CHECK-64-NEXT:    [[CONV6:%.*]] = fptosi double [[INC]] to i16
// CHECK-64-NEXT:    [[TMP12:%.*]] = mul nsw i64 1, [[TMP2]]
// CHECK-64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i64 [[TMP12]]
// CHECK-64-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1
// CHECK-64-NEXT:    store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2
// CHECK-64-NEXT:    [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
// CHECK-64-NEXT:    call void @llvm.stackrestore(i8* [[TMP13]])
// CHECK-64-NEXT:    ret void
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142
// CHECK-64-SAME: (i64 noundef [[A:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
// CHECK-64-NEXT:  entry:
// CHECK-64-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK-64-NEXT:    [[AAA_ADDR:%.*]] = alloca i64, align 8
// CHECK-64-NEXT:    [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8
// CHECK-64-NEXT:    [[B2:%.*]] = alloca [10 x i32], align 4
// CHECK-64-NEXT:    store i64 [[A]], i64* [[A_ADDR]], align 8
// CHECK-64-NEXT:    store i64 [[AAA]], i64* [[AAA_ADDR]], align 8
// CHECK-64-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8
// CHECK-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
// CHECK-64-NEXT:    [[CONV1:%.*]] = bitcast i64* [[AAA_ADDR]] to i8*
// CHECK-64-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8
// CHECK-64-NEXT:    [[TMP1:%.*]] = bitcast [10 x i32]* [[B2]] to i8*
// CHECK-64-NEXT:    [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
// CHECK-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false)
// CHECK-64-NEXT:    [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4
// CHECK-64-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// CHECK-64-NEXT:    store i32 [[ADD]], i32* [[CONV]], align 4
// CHECK-64-NEXT:    [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 1
// CHECK-64-NEXT:    [[CONV3:%.*]] = sext i8 [[TMP4]] to i32
// CHECK-64-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1
// CHECK-64-NEXT:    [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8
// CHECK-64-NEXT:    store i8 [[CONV5]], i8* [[CONV1]], align 1
// CHECK-64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B2]], i64 0, i64 2
// CHECK-64-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// CHECK-64-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP5]], 1
// CHECK-64-NEXT:    store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4
// CHECK-64-NEXT:    ret void
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128
// CHECK-64-SAME: (i64 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
// CHECK-64-NEXT:  entry:
// CHECK-64-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK-64-NEXT:    [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8
// CHECK-64-NEXT:    [[B1:%.*]] = alloca [10 x i32], align 4
// CHECK-64-NEXT:    store i64 [[A]], i64* [[A_ADDR]], align 8
// CHECK-64-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8
// CHECK-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
// CHECK-64-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8
// CHECK-64-NEXT:    [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8*
// CHECK-64-NEXT:    [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
// CHECK-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false)
// CHECK-64-NEXT:    [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4
// CHECK-64-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// CHECK-64-NEXT:    store i32 [[ADD]], i32* [[CONV]], align 4
// CHECK-64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i64 0, i64 2
// CHECK-64-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// CHECK-64-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1
// CHECK-64-NEXT:    store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4
// CHECK-64-NEXT:    ret void
// CHECK-64-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
// CHECK-64-SAME: () #[[ATTR5:[0-9]+]] {
// CHECK-64-NEXT:  entry:
// CHECK-64-NEXT:    call void @__tgt_register_requires(i64 1)
// CHECK-64-NEXT:    ret void
// CHECK-32-LABEL: define {{[^@]+}}@_Z3fooiPd
// CHECK-32-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-32-NEXT:  entry:
// CHECK-32-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 4
// CHECK-32-NEXT:    [[A:%.*]] = alloca i32, align 4
// CHECK-32-NEXT:    [[AA:%.*]] = alloca i16, align 2
// CHECK-32-NEXT:    [[B:%.*]] = alloca [10 x float], align 4
// CHECK-32-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 4
// CHECK-32-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// CHECK-32-NEXT:    [[C:%.*]] = alloca [5 x [10 x double]], align 8
// CHECK-32-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i32, align 4
// CHECK-32-NEXT:    [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 4
// CHECK-32-NEXT:    [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
// CHECK-32-NEXT:    [[P:%.*]] = alloca i32*, align 64
// CHECK-32-NEXT:    [[A_CASTED:%.*]] = alloca i32, align 4
// CHECK-32-NEXT:    [[GA_CASTED:%.*]] = alloca i32, align 4
// CHECK-32-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 4
// CHECK-32-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 4
// CHECK-32-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 4
// CHECK-32-NEXT:    [[AA_CASTED:%.*]] = alloca i32, align 4
// CHECK-32-NEXT:    [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [9 x i8*], align 4
// CHECK-32-NEXT:    [[DOTOFFLOAD_PTRS3:%.*]] = alloca [9 x i8*], align 4
// CHECK-32-NEXT:    [[DOTOFFLOAD_MAPPERS4:%.*]] = alloca [9 x i8*], align 4
// CHECK-32-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 4
// CHECK-32-NEXT:    [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [2 x i8*], align 4
// CHECK-32-NEXT:    [[DOTOFFLOAD_PTRS9:%.*]] = alloca [2 x i8*], align 4
// CHECK-32-NEXT:    [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [2 x i8*], align 4
// CHECK-32-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK-32-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 4
// CHECK-32-NEXT:    store i32 0, i32* [[A]], align 4
// CHECK-32-NEXT:    store i16 0, i16* [[AA]], align 2
// CHECK-32-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK-32-NEXT:    [[TMP1:%.*]] = call i8* @llvm.stacksave()
// CHECK-32-NEXT:    store i8* [[TMP1]], i8** [[SAVED_STACK]], align 4
// CHECK-32-NEXT:    [[VLA:%.*]] = alloca float, i32 [[TMP0]], align 4
// CHECK-32-NEXT:    store i32 [[TMP0]], i32* [[__VLA_EXPR0]], align 4
// CHECK-32-NEXT:    [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK-32-NEXT:    [[TMP3:%.*]] = mul nuw i32 5, [[TMP2]]
// CHECK-32-NEXT:    [[VLA1:%.*]] = alloca double, i32 [[TMP3]], align 8
// CHECK-32-NEXT:    store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4
// CHECK-32-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0
// CHECK-32-NEXT:    [[TMP4:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK-32-NEXT:    store i32 [[TMP4]], i32* [[X]], align 4
// CHECK-32-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1
// CHECK-32-NEXT:    [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK-32-NEXT:    store i32 [[TMP5]], i32* [[Y]], align 4
// CHECK-32-NEXT:    store i32* [[A]], i32** [[P]], align 64
// CHECK-32-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A]], align 4
// CHECK-32-NEXT:    store i32 [[TMP6]], i32* [[A_CASTED]], align 4
// CHECK-32-NEXT:    [[TMP7:%.*]] = load i32, i32* [[A_CASTED]], align 4
// CHECK-32-NEXT:    [[TMP8:%.*]] = load i32*, i32** [[P]], align 64
// CHECK-32-NEXT:    [[TMP9:%.*]] = load i32, i32* @ga, align 4
// CHECK-32-NEXT:    store i32 [[TMP9]], i32* [[GA_CASTED]], align 4
// CHECK-32-NEXT:    [[TMP10:%.*]] = load i32, i32* [[GA_CASTED]], align 4
// CHECK-32-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK-32-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32*
// CHECK-32-NEXT:    store i32 [[TMP7]], i32* [[TMP12]], align 4
// CHECK-32-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK-32-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i32*
// CHECK-32-NEXT:    store i32 [[TMP7]], i32* [[TMP14]], align 4
// CHECK-32-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
// CHECK-32-NEXT:    store i8* null, i8** [[TMP15]], align 4
// CHECK-32-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK-32-NEXT:    [[TMP17:%.*]] = bitcast i8** [[TMP16]] to i32**
// CHECK-32-NEXT:    store i32* [[TMP8]], i32** [[TMP17]], align 4
// CHECK-32-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK-32-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
// CHECK-32-NEXT:    store i32* [[TMP8]], i32** [[TMP19]], align 4
// CHECK-32-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
// CHECK-32-NEXT:    store i8* null, i8** [[TMP20]], align 4
// CHECK-32-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK-32-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i32*
// CHECK-32-NEXT:    store i32 [[TMP10]], i32* [[TMP22]], align 4
// CHECK-32-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK-32-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32*
// CHECK-32-NEXT:    store i32 [[TMP10]], i32* [[TMP24]], align 4
// CHECK-32-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
// CHECK-32-NEXT:    store i8* null, i8** [[TMP25]], align 4
// CHECK-32-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK-32-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK-32-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK-32-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK-32-NEXT:    store i32 2, i32* [[TMP28]], align 4
// CHECK-32-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK-32-NEXT:    store i32 3, i32* [[TMP29]], align 4
// CHECK-32-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK-32-NEXT:    store i8** [[TMP26]], i8*** [[TMP30]], align 4
// CHECK-32-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK-32-NEXT:    store i8** [[TMP27]], i8*** [[TMP31]], align 4
// CHECK-32-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK-32-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP32]], align 4
// CHECK-32-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK-32-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP33]], align 4
// CHECK-32-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK-32-NEXT:    store i8** null, i8*** [[TMP34]], align 4
// CHECK-32-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK-32-NEXT:    store i8** null, i8*** [[TMP35]], align 4
// CHECK-32-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK-32-NEXT:    store i64 0, i64* [[TMP36]], align 8
// CHECK-32-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK-32-NEXT:    store i64 0, i64* [[TMP37]], align 8
// CHECK-32-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK-32-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP38]], align 4
// CHECK-32-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK-32-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP39]], align 4
// CHECK-32-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK-32-NEXT:    store i32 0, i32* [[TMP40]], align 4
// CHECK-32-NEXT:    [[TMP41:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK-32-NEXT:    [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0
// CHECK-32-NEXT:    br i1 [[TMP42]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK-32:       omp_offload.failed:
// CHECK-32-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63(i32 [[TMP7]], i32* [[TMP8]], i32 [[TMP10]]) #[[ATTR3:[0-9]+]]
// CHECK-32-NEXT:    br label [[OMP_OFFLOAD_CONT]]
// CHECK-32:       omp_offload.cont:
// CHECK-32-NEXT:    [[TMP43:%.*]] = load i16, i16* [[AA]], align 2
// CHECK-32-NEXT:    [[CONV:%.*]] = bitcast i32* [[AA_CASTED]] to i16*
// CHECK-32-NEXT:    store i16 [[TMP43]], i16* [[CONV]], align 2
// CHECK-32-NEXT:    [[TMP44:%.*]] = load i32, i32* [[AA_CASTED]], align 4
// CHECK-32-NEXT:    [[TMP45:%.*]] = mul nuw i32 [[TMP0]], 4
// CHECK-32-NEXT:    [[TMP46:%.*]] = sext i32 [[TMP45]] to i64
// CHECK-32-NEXT:    [[TMP47:%.*]] = mul nuw i32 5, [[TMP2]]
// CHECK-32-NEXT:    [[TMP48:%.*]] = mul nuw i32 [[TMP47]], 8
// CHECK-32-NEXT:    [[TMP49:%.*]] = sext i32 [[TMP48]] to i64
// CHECK-32-NEXT:    [[TMP50:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
// CHECK-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP50]], i8* align 4 bitcast ([9 x i64]* @.offload_sizes.1 to i8*), i32 72, i1 false)
// CHECK-32-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0
// CHECK-32-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32*
// CHECK-32-NEXT:    store i32 [[TMP44]], i32* [[TMP52]], align 4
// CHECK-32-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 0
// CHECK-32-NEXT:    [[TMP54:%.*]] = bitcast i8** [[TMP53]] to i32*
// CHECK-32-NEXT:    store i32 [[TMP44]], i32* [[TMP54]], align 4
// CHECK-32-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 0
// CHECK-32-NEXT:    store i8* null, i8** [[TMP55]], align 4
// CHECK-32-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 1
// CHECK-32-NEXT:    [[TMP57:%.*]] = bitcast i8** [[TMP56]] to [10 x float]**
// CHECK-32-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP57]], align 4
// CHECK-32-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 1
// CHECK-32-NEXT:    [[TMP59:%.*]] = bitcast i8** [[TMP58]] to [10 x float]**
// CHECK-32-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP59]], align 4
// CHECK-32-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 1
// CHECK-32-NEXT:    store i8* null, i8** [[TMP60]], align 4
// CHECK-32-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 2
// CHECK-32-NEXT:    [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i32*
// CHECK-32-NEXT:    store i32 [[TMP0]], i32* [[TMP62]], align 4
// CHECK-32-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 2
// CHECK-32-NEXT:    [[TMP64:%.*]] = bitcast i8** [[TMP63]] to i32*
// CHECK-32-NEXT:    store i32 [[TMP0]], i32* [[TMP64]], align 4
// CHECK-32-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 2
// CHECK-32-NEXT:    store i8* null, i8** [[TMP65]], align 4
// CHECK-32-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 3
// CHECK-32-NEXT:    [[TMP67:%.*]] = bitcast i8** [[TMP66]] to float**
// CHECK-32-NEXT:    store float* [[VLA]], float** [[TMP67]], align 4
// CHECK-32-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 3
// CHECK-32-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to float**
// CHECK-32-NEXT:    store float* [[VLA]], float** [[TMP69]], align 4
// CHECK-32-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
// CHECK-32-NEXT:    store i64 [[TMP46]], i64* [[TMP70]], align 4
// CHECK-32-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 3
// CHECK-32-NEXT:    store i8* null, i8** [[TMP71]], align 4
// CHECK-32-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 4
// CHECK-32-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to [5 x [10 x double]]**
// CHECK-32-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP73]], align 4
// CHECK-32-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 4
// CHECK-32-NEXT:    [[TMP75:%.*]] = bitcast i8** [[TMP74]] to [5 x [10 x double]]**
// CHECK-32-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP75]], align 4
// CHECK-32-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 4
// CHECK-32-NEXT:    store i8* null, i8** [[TMP76]], align 4
// CHECK-32-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 5
// CHECK-32-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i32*
// CHECK-32-NEXT:    store i32 5, i32* [[TMP78]], align 4
// CHECK-32-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 5
// CHECK-32-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32*
// CHECK-32-NEXT:    store i32 5, i32* [[TMP80]], align 4
// CHECK-32-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 5
// CHECK-32-NEXT:    store i8* null, i8** [[TMP81]], align 4
// CHECK-32-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 6
// CHECK-32-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32*
// CHECK-32-NEXT:    store i32 [[TMP2]], i32* [[TMP83]], align 4
// CHECK-32-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 6
// CHECK-32-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i32*
// CHECK-32-NEXT:    store i32 [[TMP2]], i32* [[TMP85]], align 4
// CHECK-32-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 6
// CHECK-32-NEXT:    store i8* null, i8** [[TMP86]], align 4
// CHECK-32-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 7
// CHECK-32-NEXT:    [[TMP88:%.*]] = bitcast i8** [[TMP87]] to double**
// CHECK-32-NEXT:    store double* [[VLA1]], double** [[TMP88]], align 4
// CHECK-32-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 7
// CHECK-32-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to double**
// CHECK-32-NEXT:    store double* [[VLA1]], double** [[TMP90]], align 4
// CHECK-32-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
// CHECK-32-NEXT:    store i64 [[TMP49]], i64* [[TMP91]], align 4
// CHECK-32-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 7
// CHECK-32-NEXT:    store i8* null, i8** [[TMP92]], align 4
// CHECK-32-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 8
// CHECK-32-NEXT:    [[TMP94:%.*]] = bitcast i8** [[TMP93]] to %struct.TT**
// CHECK-32-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP94]], align 4
// CHECK-32-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 8
// CHECK-32-NEXT:    [[TMP96:%.*]] = bitcast i8** [[TMP95]] to %struct.TT**
// CHECK-32-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP96]], align 4
// CHECK-32-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 8
// CHECK-32-NEXT:    store i8* null, i8** [[TMP97]], align 4
// CHECK-32-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0
// CHECK-32-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 0
// CHECK-32-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
// CHECK-32-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
// CHECK-32-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 0
// CHECK-32-NEXT:    store i32 2, i32* [[TMP101]], align 4
// CHECK-32-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 1
// CHECK-32-NEXT:    store i32 9, i32* [[TMP102]], align 4
// CHECK-32-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 2
// CHECK-32-NEXT:    store i8** [[TMP98]], i8*** [[TMP103]], align 4
// CHECK-32-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 3
// CHECK-32-NEXT:    store i8** [[TMP99]], i8*** [[TMP104]], align 4
// CHECK-32-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 4
// CHECK-32-NEXT:    store i64* [[TMP100]], i64** [[TMP105]], align 4
// CHECK-32-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 5
// CHECK-32-NEXT:    store i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.2, i32 0, i32 0), i64** [[TMP106]], align 4
// CHECK-32-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 6
// CHECK-32-NEXT:    store i8** null, i8*** [[TMP107]], align 4
// CHECK-32-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 7
// CHECK-32-NEXT:    store i8** null, i8*** [[TMP108]], align 4
// CHECK-32-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 8
// CHECK-32-NEXT:    store i64 0, i64* [[TMP109]], align 8
// CHECK-32-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 9
// CHECK-32-NEXT:    store i64 0, i64* [[TMP110]], align 8
// CHECK-32-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 10
// CHECK-32-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP111]], align 4
// CHECK-32-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 11
// CHECK-32-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP112]], align 4
// CHECK-32-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 12
// CHECK-32-NEXT:    store i32 0, i32* [[TMP113]], align 4
// CHECK-32-NEXT:    [[TMP114:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]])
// CHECK-32-NEXT:    [[TMP115:%.*]] = icmp ne i32 [[TMP114]], 0
// CHECK-32-NEXT:    br i1 [[TMP115]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]]
// CHECK-32:       omp_offload.failed6:
// CHECK-32-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70(i32 [[TMP44]], [10 x float]* [[B]], i32 [[TMP0]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP2]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR3]]
// CHECK-32-NEXT:    br label [[OMP_OFFLOAD_CONT7]]
// CHECK-32:       omp_offload.cont7:
// CHECK-32-NEXT:    [[TMP116:%.*]] = load double*, double** [[PTR_ADDR]], align 4
// CHECK-32-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
// CHECK-32-NEXT:    [[TMP118:%.*]] = bitcast i8** [[TMP117]] to double**
// CHECK-32-NEXT:    store double* [[TMP116]], double** [[TMP118]], align 4
// CHECK-32-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
// CHECK-32-NEXT:    [[TMP120:%.*]] = bitcast i8** [[TMP119]] to double**
// CHECK-32-NEXT:    store double* [[TMP116]], double** [[TMP120]], align 4
// CHECK-32-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS10]], i32 0, i32 0
// CHECK-32-NEXT:    store i8* null, i8** [[TMP121]], align 4
// CHECK-32-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 1
// CHECK-32-NEXT:    [[TMP123:%.*]] = bitcast i8** [[TMP122]] to %struct.TT.0**
// CHECK-32-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[TMP123]], align 4
// CHECK-32-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 1
// CHECK-32-NEXT:    [[TMP125:%.*]] = bitcast i8** [[TMP124]] to %struct.TT.0**
// CHECK-32-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[TMP125]], align 4
// CHECK-32-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS10]], i32 0, i32 1
// CHECK-32-NEXT:    store i8* null, i8** [[TMP126]], align 4
// CHECK-32-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
// CHECK-32-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
// CHECK-32-NEXT:    [[KERNEL_ARGS11:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
// CHECK-32-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 0
// CHECK-32-NEXT:    store i32 2, i32* [[TMP129]], align 4
// CHECK-32-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 1
// CHECK-32-NEXT:    store i32 2, i32* [[TMP130]], align 4
// CHECK-32-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 2
// CHECK-32-NEXT:    store i8** [[TMP127]], i8*** [[TMP131]], align 4
// CHECK-32-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 3
// CHECK-32-NEXT:    store i8** [[TMP128]], i8*** [[TMP132]], align 4
// CHECK-32-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 4
// CHECK-32-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.3, i32 0, i32 0), i64** [[TMP133]], align 4
// CHECK-32-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 5
// CHECK-32-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.4, i32 0, i32 0), i64** [[TMP134]], align 4
// CHECK-32-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 6
// CHECK-32-NEXT:    store i8** null, i8*** [[TMP135]], align 4
// CHECK-32-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 7
// CHECK-32-NEXT:    store i8** null, i8*** [[TMP136]], align 4
// CHECK-32-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 8
// CHECK-32-NEXT:    store i64 0, i64* [[TMP137]], align 8
// CHECK-32-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 9
// CHECK-32-NEXT:    store i64 0, i64* [[TMP138]], align 8
// CHECK-32-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 10
// CHECK-32-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP139]], align 4
// CHECK-32-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 11
// CHECK-32-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP140]], align 4
// CHECK-32-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 12
// CHECK-32-NEXT:    store i32 0, i32* [[TMP141]], align 4
// CHECK-32-NEXT:    [[TMP142:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]])
// CHECK-32-NEXT:    [[TMP143:%.*]] = icmp ne i32 [[TMP142]], 0
// CHECK-32-NEXT:    br i1 [[TMP143]], label [[OMP_OFFLOAD_FAILED12:%.*]], label [[OMP_OFFLOAD_CONT13:%.*]]
// CHECK-32:       omp_offload.failed12:
// CHECK-32-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111(double* [[TMP116]], %struct.TT.0* [[E]]) #[[ATTR3]]
// CHECK-32-NEXT:    br label [[OMP_OFFLOAD_CONT13]]
// CHECK-32:       omp_offload.cont13:
// CHECK-32-NEXT:    [[TMP144:%.*]] = load i32, i32* [[A]], align 4
// CHECK-32-NEXT:    [[TMP145:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
// CHECK-32-NEXT:    call void @llvm.stackrestore(i8* [[TMP145]])
// CHECK-32-NEXT:    ret i32 [[TMP144]]
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63
// CHECK-32-SAME: (i32 noundef [[A:%.*]], i32* noundef [[P:%.*]], i32 noundef [[GA:%.*]]) #[[ATTR2:[0-9]+]] {
// CHECK-32-NEXT:  entry:
// CHECK-32-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT:    [[P_ADDR:%.*]] = alloca i32*, align 4
// CHECK-32-NEXT:    [[GA_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
// CHECK-32-NEXT:    store i32* [[P]], i32** [[P_ADDR]], align 4
// CHECK-32-NEXT:    store i32 [[GA]], i32* [[GA_ADDR]], align 4
// CHECK-32-NEXT:    ret void
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70
// CHECK-32-SAME: (i32 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR2]] {
// CHECK-32-NEXT:  entry:
// CHECK-32-NEXT:    [[AA_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT:    [[B_ADDR:%.*]] = alloca [10 x float]*, align 4
// CHECK-32-NEXT:    [[VLA_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT:    [[BN_ADDR:%.*]] = alloca float*, align 4
// CHECK-32-NEXT:    [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 4
// CHECK-32-NEXT:    [[VLA_ADDR2:%.*]] = alloca i32, align 4
// CHECK-32-NEXT:    [[VLA_ADDR4:%.*]] = alloca i32, align 4
// CHECK-32-NEXT:    [[CN_ADDR:%.*]] = alloca double*, align 4
// CHECK-32-NEXT:    [[D_ADDR:%.*]] = alloca %struct.TT*, align 4
// CHECK-32-NEXT:    [[B5:%.*]] = alloca [10 x float], align 4
// CHECK-32-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 4
// CHECK-32-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// CHECK-32-NEXT:    [[C7:%.*]] = alloca [5 x [10 x double]], align 8
// CHECK-32-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i32, align 4
// CHECK-32-NEXT:    [[__VLA_EXPR2:%.*]] = alloca i32, align 4
// CHECK-32-NEXT:    [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 4
// CHECK-32-NEXT:    store i32 [[AA]], i32* [[AA_ADDR]], align 4
// CHECK-32-NEXT:    store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4
// CHECK-32-NEXT:    store i32 [[VLA]], i32* [[VLA_ADDR]], align 4
// CHECK-32-NEXT:    store float* [[BN]], float** [[BN_ADDR]], align 4
// CHECK-32-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 4
// CHECK-32-NEXT:    store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4
// CHECK-32-NEXT:    store i32 [[VLA3]], i32* [[VLA_ADDR4]], align 4
// CHECK-32-NEXT:    store double* [[CN]], double** [[CN_ADDR]], align 4
// CHECK-32-NEXT:    store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 4
// CHECK-32-NEXT:    [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16*
// CHECK-32-NEXT:    [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 4
// CHECK-32-NEXT:    [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4
// CHECK-32-NEXT:    [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 4
// CHECK-32-NEXT:    [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 4
// CHECK-32-NEXT:    [[TMP4:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4
// CHECK-32-NEXT:    [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4
// CHECK-32-NEXT:    [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4
// CHECK-32-NEXT:    [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4
// CHECK-32-NEXT:    [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8*
// CHECK-32-NEXT:    [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8*
// CHECK-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i32 40, i1 false)
// CHECK-32-NEXT:    [[TMP10:%.*]] = call i8* @llvm.stacksave()
// CHECK-32-NEXT:    store i8* [[TMP10]], i8** [[SAVED_STACK]], align 4
// CHECK-32-NEXT:    [[VLA6:%.*]] = alloca float, i32 [[TMP1]], align 4
// CHECK-32-NEXT:    store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4
// CHECK-32-NEXT:    [[TMP11:%.*]] = mul nuw i32 [[TMP1]], 4
// CHECK-32-NEXT:    [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8*
// CHECK-32-NEXT:    [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8*
// CHECK-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 [[TMP11]], i1 false)
// CHECK-32-NEXT:    [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8*
// CHECK-32-NEXT:    [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8*
// CHECK-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i32 400, i1 false)
// CHECK-32-NEXT:    [[TMP16:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]]
// CHECK-32-NEXT:    [[VLA8:%.*]] = alloca double, i32 [[TMP16]], align 8
// CHECK-32-NEXT:    store i32 [[TMP4]], i32* [[__VLA_EXPR1]], align 4
// CHECK-32-NEXT:    store i32 [[TMP5]], i32* [[__VLA_EXPR2]], align 4
// CHECK-32-NEXT:    [[TMP17:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]]
// CHECK-32-NEXT:    [[TMP18:%.*]] = mul nuw i32 [[TMP17]], 8
// CHECK-32-NEXT:    [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8*
// CHECK-32-NEXT:    [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8*
// CHECK-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i32 [[TMP18]], i1 false)
// CHECK-32-NEXT:    [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8*
// CHECK-32-NEXT:    [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8*
// CHECK-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP21]], i8* align 4 [[TMP22]], i32 12, i1 false)
// CHECK-32-NEXT:    [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2
// CHECK-32-NEXT:    [[CONV10:%.*]] = sext i16 [[TMP23]] to i32
// CHECK-32-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV10]], 1
// CHECK-32-NEXT:    [[CONV11:%.*]] = trunc i32 [[ADD]] to i16
// CHECK-32-NEXT:    store i16 [[CONV11]], i16* [[CONV]], align 2
// CHECK-32-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i32 0, i32 2
// CHECK-32-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX]], align 4
// CHECK-32-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i32 3
// CHECK-32-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX12]], align 4
// CHECK-32-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i32 0, i32 1
// CHECK-32-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i32 0, i32 2
// CHECK-32-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX14]], align 8
// CHECK-32-NEXT:    [[TMP24:%.*]] = mul nsw i32 1, [[TMP5]]
// CHECK-32-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i32 [[TMP24]]
// CHECK-32-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i32 3
// CHECK-32-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX16]], align 8
// CHECK-32-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0
// CHECK-32-NEXT:    store i64 1, i64* [[X]], align 4
// CHECK-32-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1
// CHECK-32-NEXT:    store i8 1, i8* [[Y]], align 4
// CHECK-32-NEXT:    [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
// CHECK-32-NEXT:    call void @llvm.stackrestore(i8* [[TMP25]])
// CHECK-32-NEXT:    ret void
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111
// CHECK-32-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR2]] {
// CHECK-32-NEXT:  entry:
// CHECK-32-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 4
// CHECK-32-NEXT:    [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 4
// CHECK-32-NEXT:    [[E1:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
// CHECK-32-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 4
// CHECK-32-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 4
// CHECK-32-NEXT:    [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 4
// CHECK-32-NEXT:    [[TMP1:%.*]] = bitcast %struct.TT.0* [[E1]] to i8*
// CHECK-32-NEXT:    [[TMP2:%.*]] = bitcast %struct.TT.0* [[TMP0]] to i8*
// CHECK-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 8, i1 false)
// CHECK-32-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E1]], i32 0, i32 0
// CHECK-32-NEXT:    [[TMP3:%.*]] = load i32, i32* [[X]], align 4
// CHECK-32-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP3]] to double
// CHECK-32-NEXT:    [[TMP4:%.*]] = load double*, double** [[PTR_ADDR]], align 4
// CHECK-32-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP4]], i32 0
// CHECK-32-NEXT:    store double [[CONV]], double* [[ARRAYIDX]], align 4
// CHECK-32-NEXT:    [[TMP5:%.*]] = load double*, double** [[PTR_ADDR]], align 4
// CHECK-32-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[TMP5]], i32 0
// CHECK-32-NEXT:    [[TMP6:%.*]] = load double, double* [[ARRAYIDX2]], align 4
// CHECK-32-NEXT:    [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00
// CHECK-32-NEXT:    store double [[INC]], double* [[ARRAYIDX2]], align 4
// CHECK-32-NEXT:    ret void
// CHECK-32-LABEL: define {{[^@]+}}@_Z3bariPd
// CHECK-32-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-NEXT:  entry:
// CHECK-32-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 4
// CHECK-32-NEXT:    [[A:%.*]] = alloca i32, align 4
// CHECK-32-NEXT:    [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 4
// CHECK-32-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK-32-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 4
// CHECK-32-NEXT:    store i32 0, i32* [[A]], align 4
// CHECK-32-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK-32-NEXT:    [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 4
// CHECK-32-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z3fooiPd(i32 noundef [[TMP0]], double* noundef [[TMP1]])
// CHECK-32-NEXT:    [[TMP2:%.*]] = load i32, i32* [[A]], align 4
// CHECK-32-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]]
// CHECK-32-NEXT:    store i32 [[ADD]], i32* [[A]], align 4
// CHECK-32-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK-32-NEXT:    [[CALL1:%.*]] = call noundef i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 4 dereferenceable(8) [[S]], i32 noundef [[TMP3]])
// CHECK-32-NEXT:    [[TMP4:%.*]] = load i32, i32* [[A]], align 4
// CHECK-32-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]]
// CHECK-32-NEXT:    store i32 [[ADD2]], i32* [[A]], align 4
// CHECK-32-NEXT:    [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK-32-NEXT:    [[CALL3:%.*]] = call noundef i32 @_ZL7fstatici(i32 noundef [[TMP5]])
// CHECK-32-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A]], align 4
// CHECK-32-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]]
// CHECK-32-NEXT:    store i32 [[ADD4]], i32* [[A]], align 4
// CHECK-32-NEXT:    [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK-32-NEXT:    [[CALL5:%.*]] = call noundef i32 @_Z9ftemplateIiET_i(i32 noundef [[TMP7]])
// CHECK-32-NEXT:    [[TMP8:%.*]] = load i32, i32* [[A]], align 4
// CHECK-32-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]]
// CHECK-32-NEXT:    store i32 [[ADD6]], i32* [[A]], align 4
// CHECK-32-NEXT:    [[TMP9:%.*]] = load i32, i32* [[A]], align 4
// CHECK-32-NEXT:    ret i32 [[TMP9]]
// CHECK-32-LABEL: define {{[^@]+}}@_ZN2S12r1Ei
// CHECK-32-SAME: (%struct.S1* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[N:%.*]]) #[[ATTR0]] comdat align 2 {
// CHECK-32-NEXT:  entry:
// CHECK-32-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4
// CHECK-32-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT:    [[B:%.*]] = alloca i32, align 4
// CHECK-32-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 4
// CHECK-32-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// CHECK-32-NEXT:    [[B_CASTED:%.*]] = alloca i32, align 4
// CHECK-32-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [5 x i8*], align 4
// CHECK-32-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x i8*], align 4
// CHECK-32-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x i8*], align 4
// CHECK-32-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 4
// CHECK-32-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4
// CHECK-32-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK-32-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4
// CHECK-32-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK-32-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// CHECK-32-NEXT:    store i32 [[ADD]], i32* [[B]], align 4
// CHECK-32-NEXT:    [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK-32-NEXT:    [[TMP2:%.*]] = call i8* @llvm.stacksave()
// CHECK-32-NEXT:    store i8* [[TMP2]], i8** [[SAVED_STACK]], align 4
// CHECK-32-NEXT:    [[TMP3:%.*]] = mul nuw i32 2, [[TMP1]]
// CHECK-32-NEXT:    [[VLA:%.*]] = alloca i16, i32 [[TMP3]], align 2
// CHECK-32-NEXT:    store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4
// CHECK-32-NEXT:    [[TMP4:%.*]] = load i32, i32* [[B]], align 4
// CHECK-32-NEXT:    store i32 [[TMP4]], i32* [[B_CASTED]], align 4
// CHECK-32-NEXT:    [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4
// CHECK-32-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
// CHECK-32-NEXT:    [[TMP6:%.*]] = mul nuw i32 2, [[TMP1]]
// CHECK-32-NEXT:    [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2
// CHECK-32-NEXT:    [[TMP8:%.*]] = sext i32 [[TMP7]] to i64
// CHECK-32-NEXT:    [[TMP9:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
// CHECK-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP9]], i8* align 4 bitcast ([5 x i64]* @.offload_sizes.5 to i8*), i32 40, i1 false)
// CHECK-32-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK-32-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
// CHECK-32-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 4
// CHECK-32-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK-32-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
// CHECK-32-NEXT:    store double* [[A]], double** [[TMP13]], align 4
// CHECK-32-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
// CHECK-32-NEXT:    store i8* null, i8** [[TMP14]], align 4
// CHECK-32-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK-32-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i32*
// CHECK-32-NEXT:    store i32 [[TMP5]], i32* [[TMP16]], align 4
// CHECK-32-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK-32-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32*
// CHECK-32-NEXT:    store i32 [[TMP5]], i32* [[TMP18]], align 4
// CHECK-32-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
// CHECK-32-NEXT:    store i8* null, i8** [[TMP19]], align 4
// CHECK-32-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK-32-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32*
// CHECK-32-NEXT:    store i32 2, i32* [[TMP21]], align 4
// CHECK-32-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK-32-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32*
// CHECK-32-NEXT:    store i32 2, i32* [[TMP23]], align 4
// CHECK-32-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
// CHECK-32-NEXT:    store i8* null, i8** [[TMP24]], align 4
// CHECK-32-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
// CHECK-32-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i32*
// CHECK-32-NEXT:    store i32 [[TMP1]], i32* [[TMP26]], align 4
// CHECK-32-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
// CHECK-32-NEXT:    [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i32*
// CHECK-32-NEXT:    store i32 [[TMP1]], i32* [[TMP28]], align 4
// CHECK-32-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
// CHECK-32-NEXT:    store i8* null, i8** [[TMP29]], align 4
// CHECK-32-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
// CHECK-32-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i16**
// CHECK-32-NEXT:    store i16* [[VLA]], i16** [[TMP31]], align 4
// CHECK-32-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
// CHECK-32-NEXT:    [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i16**
// CHECK-32-NEXT:    store i16* [[VLA]], i16** [[TMP33]], align 4
// CHECK-32-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
// CHECK-32-NEXT:    store i64 [[TMP8]], i64* [[TMP34]], align 4
// CHECK-32-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
// CHECK-32-NEXT:    store i8* null, i8** [[TMP35]], align 4
// CHECK-32-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK-32-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK-32-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
// CHECK-32-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK-32-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK-32-NEXT:    store i32 2, i32* [[TMP39]], align 4
// CHECK-32-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK-32-NEXT:    store i32 5, i32* [[TMP40]], align 4
// CHECK-32-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK-32-NEXT:    store i8** [[TMP36]], i8*** [[TMP41]], align 4
// CHECK-32-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK-32-NEXT:    store i8** [[TMP37]], i8*** [[TMP42]], align 4
// CHECK-32-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK-32-NEXT:    store i64* [[TMP38]], i64** [[TMP43]], align 4
// CHECK-32-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK-32-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.6, i32 0, i32 0), i64** [[TMP44]], align 4
// CHECK-32-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK-32-NEXT:    store i8** null, i8*** [[TMP45]], align 4
// CHECK-32-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK-32-NEXT:    store i8** null, i8*** [[TMP46]], align 4
// CHECK-32-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK-32-NEXT:    store i64 0, i64* [[TMP47]], align 8
// CHECK-32-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK-32-NEXT:    store i64 0, i64* [[TMP48]], align 8
// CHECK-32-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK-32-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP49]], align 4
// CHECK-32-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK-32-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP50]], align 4
// CHECK-32-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK-32-NEXT:    store i32 0, i32* [[TMP51]], align 4
// CHECK-32-NEXT:    [[TMP52:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK-32-NEXT:    [[TMP53:%.*]] = icmp ne i32 [[TMP52]], 0
// CHECK-32-NEXT:    br i1 [[TMP53]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK-32:       omp_offload.failed:
// CHECK-32-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR3]]
// CHECK-32-NEXT:    br label [[OMP_OFFLOAD_CONT]]
// CHECK-32:       omp_offload.cont:
// CHECK-32-NEXT:    [[TMP54:%.*]] = mul nsw i32 1, [[TMP1]]
// CHECK-32-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP54]]
// CHECK-32-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1
// CHECK-32-NEXT:    [[TMP55:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
// CHECK-32-NEXT:    [[CONV:%.*]] = sext i16 [[TMP55]] to i32
// CHECK-32-NEXT:    [[TMP56:%.*]] = load i32, i32* [[B]], align 4
// CHECK-32-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP56]]
// CHECK-32-NEXT:    [[TMP57:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
// CHECK-32-NEXT:    call void @llvm.stackrestore(i8* [[TMP57]])
// CHECK-32-NEXT:    ret i32 [[ADD3]]
// CHECK-32-LABEL: define {{[^@]+}}@_ZL7fstatici
// CHECK-32-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] {
// CHECK-32-NEXT:  entry:
// CHECK-32-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT:    [[A:%.*]] = alloca i32, align 4
// CHECK-32-NEXT:    [[AAA:%.*]] = alloca i8, align 1
// CHECK-32-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
// CHECK-32-NEXT:    [[A_CASTED:%.*]] = alloca i32, align 4
// CHECK-32-NEXT:    [[AAA_CASTED:%.*]] = alloca i32, align 4
// CHECK-32-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 4
// CHECK-32-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 4
// CHECK-32-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 4
// CHECK-32-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK-32-NEXT:    store i32 0, i32* [[A]], align 4
// CHECK-32-NEXT:    store i8 0, i8* [[AAA]], align 1
// CHECK-32-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
// CHECK-32-NEXT:    store i32 [[TMP0]], i32* [[A_CASTED]], align 4
// CHECK-32-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4
// CHECK-32-NEXT:    [[TMP2:%.*]] = load i8, i8* [[AAA]], align 1
// CHECK-32-NEXT:    [[CONV:%.*]] = bitcast i32* [[AAA_CASTED]] to i8*
// CHECK-32-NEXT:    store i8 [[TMP2]], i8* [[CONV]], align 1
// CHECK-32-NEXT:    [[TMP3:%.*]] = load i32, i32* [[AAA_CASTED]], align 4
// CHECK-32-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK-32-NEXT:    [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i32*
// CHECK-32-NEXT:    store i32 [[TMP1]], i32* [[TMP5]], align 4
// CHECK-32-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK-32-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32*
// CHECK-32-NEXT:    store i32 [[TMP1]], i32* [[TMP7]], align 4
// CHECK-32-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
// CHECK-32-NEXT:    store i8* null, i8** [[TMP8]], align 4
// CHECK-32-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK-32-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
// CHECK-32-NEXT:    store i32 [[TMP3]], i32* [[TMP10]], align 4
// CHECK-32-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK-32-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32*
// CHECK-32-NEXT:    store i32 [[TMP3]], i32* [[TMP12]], align 4
// CHECK-32-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
// CHECK-32-NEXT:    store i8* null, i8** [[TMP13]], align 4
// CHECK-32-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK-32-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to [10 x i32]**
// CHECK-32-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP15]], align 4
// CHECK-32-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK-32-NEXT:    [[TMP17:%.*]] = bitcast i8** [[TMP16]] to [10 x i32]**
// CHECK-32-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP17]], align 4
// CHECK-32-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
// CHECK-32-NEXT:    store i8* null, i8** [[TMP18]], align 4
// CHECK-32-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK-32-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK-32-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK-32-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK-32-NEXT:    store i32 2, i32* [[TMP21]], align 4
// CHECK-32-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK-32-NEXT:    store i32 3, i32* [[TMP22]], align 4
// CHECK-32-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK-32-NEXT:    store i8** [[TMP19]], i8*** [[TMP23]], align 4
// CHECK-32-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK-32-NEXT:    store i8** [[TMP20]], i8*** [[TMP24]], align 4
// CHECK-32-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK-32-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.7, i32 0, i32 0), i64** [[TMP25]], align 4
// CHECK-32-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK-32-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.8, i32 0, i32 0), i64** [[TMP26]], align 4
// CHECK-32-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK-32-NEXT:    store i8** null, i8*** [[TMP27]], align 4
// CHECK-32-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK-32-NEXT:    store i8** null, i8*** [[TMP28]], align 4
// CHECK-32-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK-32-NEXT:    store i64 0, i64* [[TMP29]], align 8
// CHECK-32-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK-32-NEXT:    store i64 0, i64* [[TMP30]], align 8
// CHECK-32-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK-32-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP31]], align 4
// CHECK-32-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK-32-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP32]], align 4
// CHECK-32-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK-32-NEXT:    store i32 0, i32* [[TMP33]], align 4
// CHECK-32-NEXT:    [[TMP34:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK-32-NEXT:    [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0
// CHECK-32-NEXT:    br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK-32:       omp_offload.failed:
// CHECK-32-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142(i32 [[TMP1]], i32 [[TMP3]], [10 x i32]* [[B]]) #[[ATTR3]]
// CHECK-32-NEXT:    br label [[OMP_OFFLOAD_CONT]]
// CHECK-32:       omp_offload.cont:
// CHECK-32-NEXT:    [[TMP36:%.*]] = load i32, i32* [[A]], align 4
// CHECK-32-NEXT:    ret i32 [[TMP36]]
// CHECK-32-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i
// CHECK-32-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] comdat {
// CHECK-32-NEXT:  entry:
// CHECK-32-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT:    [[A:%.*]] = alloca i32, align 4
// CHECK-32-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
// CHECK-32-NEXT:    [[A_CASTED:%.*]] = alloca i32, align 4
// CHECK-32-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x i8*], align 4
// CHECK-32-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x i8*], align 4
// CHECK-32-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x i8*], align 4
// CHECK-32-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK-32-NEXT:    store i32 0, i32* [[A]], align 4
// CHECK-32-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
// CHECK-32-NEXT:    store i32 [[TMP0]], i32* [[A_CASTED]], align 4
// CHECK-32-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4
// CHECK-32-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK-32-NEXT:    [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32*
// CHECK-32-NEXT:    store i32 [[TMP1]], i32* [[TMP3]], align 4
// CHECK-32-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK-32-NEXT:    [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i32*
// CHECK-32-NEXT:    store i32 [[TMP1]], i32* [[TMP5]], align 4
// CHECK-32-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
// CHECK-32-NEXT:    store i8* null, i8** [[TMP6]], align 4
// CHECK-32-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK-32-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to [10 x i32]**
// CHECK-32-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP8]], align 4
// CHECK-32-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK-32-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to [10 x i32]**
// CHECK-32-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP10]], align 4
// CHECK-32-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
// CHECK-32-NEXT:    store i8* null, i8** [[TMP11]], align 4
// CHECK-32-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK-32-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK-32-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK-32-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK-32-NEXT:    store i32 2, i32* [[TMP14]], align 4
// CHECK-32-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK-32-NEXT:    store i32 2, i32* [[TMP15]], align 4
// CHECK-32-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK-32-NEXT:    store i8** [[TMP12]], i8*** [[TMP16]], align 4
// CHECK-32-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK-32-NEXT:    store i8** [[TMP13]], i8*** [[TMP17]], align 4
// CHECK-32-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK-32-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.9, i32 0, i32 0), i64** [[TMP18]], align 4
// CHECK-32-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK-32-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.10, i32 0, i32 0), i64** [[TMP19]], align 4
// CHECK-32-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK-32-NEXT:    store i8** null, i8*** [[TMP20]], align 4
// CHECK-32-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK-32-NEXT:    store i8** null, i8*** [[TMP21]], align 4
// CHECK-32-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK-32-NEXT:    store i64 0, i64* [[TMP22]], align 8
// CHECK-32-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK-32-NEXT:    store i64 0, i64* [[TMP23]], align 8
// CHECK-32-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK-32-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP24]], align 4
// CHECK-32-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK-32-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP25]], align 4
// CHECK-32-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK-32-NEXT:    store i32 0, i32* [[TMP26]], align 4
// CHECK-32-NEXT:    [[TMP27:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK-32-NEXT:    [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0
// CHECK-32-NEXT:    br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK-32:       omp_offload.failed:
// CHECK-32-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128(i32 [[TMP1]], [10 x i32]* [[B]]) #[[ATTR3]]
// CHECK-32-NEXT:    br label [[OMP_OFFLOAD_CONT]]
// CHECK-32:       omp_offload.cont:
// CHECK-32-NEXT:    [[TMP29:%.*]] = load i32, i32* [[A]], align 4
// CHECK-32-NEXT:    ret i32 [[TMP29]]
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167
// CHECK-32-SAME: (%struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] {
// CHECK-32-NEXT:  entry:
// CHECK-32-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4
// CHECK-32-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT:    [[VLA_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT:    [[VLA_ADDR2:%.*]] = alloca i32, align 4
// CHECK-32-NEXT:    [[C_ADDR:%.*]] = alloca i16*, align 4
// CHECK-32-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 4
// CHECK-32-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// CHECK-32-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i32, align 4
// CHECK-32-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4
// CHECK-32-NEXT:    store i32 [[B]], i32* [[B_ADDR]], align 4
// CHECK-32-NEXT:    store i32 [[VLA]], i32* [[VLA_ADDR]], align 4
// CHECK-32-NEXT:    store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4
// CHECK-32-NEXT:    store i16* [[C]], i16** [[C_ADDR]], align 4
// CHECK-32-NEXT:    [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4
// CHECK-32-NEXT:    [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4
// CHECK-32-NEXT:    [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4
// CHECK-32-NEXT:    [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4
// CHECK-32-NEXT:    [[TMP4:%.*]] = call i8* @llvm.stacksave()
// CHECK-32-NEXT:    store i8* [[TMP4]], i8** [[SAVED_STACK]], align 4
// CHECK-32-NEXT:    [[TMP5:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
// CHECK-32-NEXT:    [[VLA3:%.*]] = alloca i16, i32 [[TMP5]], align 2
// CHECK-32-NEXT:    store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4
// CHECK-32-NEXT:    store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4
// CHECK-32-NEXT:    [[TMP6:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
// CHECK-32-NEXT:    [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2
// CHECK-32-NEXT:    [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8*
// CHECK-32-NEXT:    [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8*
// CHECK-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i32 [[TMP7]], i1 false)
// CHECK-32-NEXT:    [[TMP10:%.*]] = load i32, i32* [[B_ADDR]], align 4
// CHECK-32-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP10]] to double
// CHECK-32-NEXT:    [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00
// CHECK-32-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0
// CHECK-32-NEXT:    store double [[ADD]], double* [[A]], align 4
// CHECK-32-NEXT:    [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0
// CHECK-32-NEXT:    [[TMP11:%.*]] = load double, double* [[A4]], align 4
// CHECK-32-NEXT:    [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00
// CHECK-32-NEXT:    store double [[INC]], double* [[A4]], align 4
// CHECK-32-NEXT:    [[CONV5:%.*]] = fptosi double [[INC]] to i16
// CHECK-32-NEXT:    [[TMP12:%.*]] = mul nsw i32 1, [[TMP2]]
// CHECK-32-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i32 [[TMP12]]
// CHECK-32-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1
// CHECK-32-NEXT:    store i16 [[CONV5]], i16* [[ARRAYIDX6]], align 2
// CHECK-32-NEXT:    [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
// CHECK-32-NEXT:    call void @llvm.stackrestore(i8* [[TMP13]])
// CHECK-32-NEXT:    ret void
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142
// CHECK-32-SAME: (i32 noundef [[A:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
// CHECK-32-NEXT:  entry:
// CHECK-32-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT:    [[AAA_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT:    [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4
// CHECK-32-NEXT:    [[B1:%.*]] = alloca [10 x i32], align 4
// CHECK-32-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
// CHECK-32-NEXT:    store i32 [[AAA]], i32* [[AAA_ADDR]], align 4
// CHECK-32-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4
// CHECK-32-NEXT:    [[CONV:%.*]] = bitcast i32* [[AAA_ADDR]] to i8*
// CHECK-32-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4
// CHECK-32-NEXT:    [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8*
// CHECK-32-NEXT:    [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
// CHECK-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false)
// CHECK-32-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
// CHECK-32-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// CHECK-32-NEXT:    store i32 [[ADD]], i32* [[A_ADDR]], align 4
// CHECK-32-NEXT:    [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1
// CHECK-32-NEXT:    [[CONV2:%.*]] = sext i8 [[TMP4]] to i32
// CHECK-32-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1
// CHECK-32-NEXT:    [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8
// CHECK-32-NEXT:    store i8 [[CONV4]], i8* [[CONV]], align 1
// CHECK-32-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2
// CHECK-32-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// CHECK-32-NEXT:    [[ADD5:%.*]] = add nsw i32 [[TMP5]], 1
// CHECK-32-NEXT:    store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4
// CHECK-32-NEXT:    ret void
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128
// CHECK-32-SAME: (i32 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
// CHECK-32-NEXT:  entry:
// CHECK-32-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT:    [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4
// CHECK-32-NEXT:    [[B1:%.*]] = alloca [10 x i32], align 4
// CHECK-32-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
// CHECK-32-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4
// CHECK-32-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4
// CHECK-32-NEXT:    [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8*
// CHECK-32-NEXT:    [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
// CHECK-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false)
// CHECK-32-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
// CHECK-32-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// CHECK-32-NEXT:    store i32 [[ADD]], i32* [[A_ADDR]], align 4
// CHECK-32-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2
// CHECK-32-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// CHECK-32-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1
// CHECK-32-NEXT:    store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4
// CHECK-32-NEXT:    ret void
// CHECK-32-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
// CHECK-32-SAME: () #[[ATTR5:[0-9]+]] {
// CHECK-32-NEXT:  entry:
// CHECK-32-NEXT:    call void @__tgt_register_requires(i64 1)
// CHECK-32-NEXT:    ret void
// CHECK0-64-LABEL: define {{[^@]+}}@_Z3fooiPd
// CHECK0-64-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK0-64-NEXT:  entry:
// CHECK0-64-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK0-64-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 8
// CHECK0-64-NEXT:    [[A:%.*]] = alloca i32, align 4
// CHECK0-64-NEXT:    [[AA:%.*]] = alloca i16, align 2
// CHECK0-64-NEXT:    [[B:%.*]] = alloca [10 x float], align 4
// CHECK0-64-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 8
// CHECK0-64-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// CHECK0-64-NEXT:    [[C:%.*]] = alloca [5 x [10 x double]], align 8
// CHECK0-64-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i64, align 8
// CHECK0-64-NEXT:    [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 8
// CHECK0-64-NEXT:    [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
// CHECK0-64-NEXT:    [[P:%.*]] = alloca i32*, align 64
// CHECK0-64-NEXT:    [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK0-64-NEXT:    [[GA_CASTED:%.*]] = alloca i64, align 8
// CHECK0-64-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 8
// CHECK0-64-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 8
// CHECK0-64-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 8
// CHECK0-64-NEXT:    [[AA_CASTED:%.*]] = alloca i64, align 8
// CHECK0-64-NEXT:    [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [9 x i8*], align 8
// CHECK0-64-NEXT:    [[DOTOFFLOAD_PTRS5:%.*]] = alloca [9 x i8*], align 8
// CHECK0-64-NEXT:    [[DOTOFFLOAD_MAPPERS6:%.*]] = alloca [9 x i8*], align 8
// CHECK0-64-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 8
// CHECK0-64-NEXT:    [[DOTOFFLOAD_BASEPTRS10:%.*]] = alloca [2 x i8*], align 8
// CHECK0-64-NEXT:    [[DOTOFFLOAD_PTRS11:%.*]] = alloca [2 x i8*], align 8
// CHECK0-64-NEXT:    [[DOTOFFLOAD_MAPPERS12:%.*]] = alloca [2 x i8*], align 8
// CHECK0-64-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK0-64-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 8
// CHECK0-64-NEXT:    store i32 0, i32* [[A]], align 4
// CHECK0-64-NEXT:    store i16 0, i16* [[AA]], align 2
// CHECK0-64-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK0-64-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
// CHECK0-64-NEXT:    [[TMP2:%.*]] = call i8* @llvm.stacksave()
// CHECK0-64-NEXT:    store i8* [[TMP2]], i8** [[SAVED_STACK]], align 8
// CHECK0-64-NEXT:    [[VLA:%.*]] = alloca float, i64 [[TMP1]], align 4
// CHECK0-64-NEXT:    store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8
// CHECK0-64-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK0-64-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
// CHECK0-64-NEXT:    [[TMP5:%.*]] = mul nuw i64 5, [[TMP4]]
// CHECK0-64-NEXT:    [[VLA1:%.*]] = alloca double, i64 [[TMP5]], align 8
// CHECK0-64-NEXT:    store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8
// CHECK0-64-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0
// CHECK0-64-NEXT:    [[TMP6:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK0-64-NEXT:    store i32 [[TMP6]], i32* [[X]], align 4
// CHECK0-64-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1
// CHECK0-64-NEXT:    [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK0-64-NEXT:    store i32 [[TMP7]], i32* [[Y]], align 4
// CHECK0-64-NEXT:    store i32* [[A]], i32** [[P]], align 64
// CHECK0-64-NEXT:    [[TMP8:%.*]] = load i32, i32* [[A]], align 4
// CHECK0-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32*
// CHECK0-64-NEXT:    store i32 [[TMP8]], i32* [[CONV]], align 4
// CHECK0-64-NEXT:    [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8
// CHECK0-64-NEXT:    [[TMP10:%.*]] = load i32*, i32** [[P]], align 64
// CHECK0-64-NEXT:    [[TMP11:%.*]] = load i32, i32* @ga, align 4
// CHECK0-64-NEXT:    [[CONV2:%.*]] = bitcast i64* [[GA_CASTED]] to i32*
// CHECK0-64-NEXT:    store i32 [[TMP11]], i32* [[CONV2]], align 4
// CHECK0-64-NEXT:    [[TMP12:%.*]] = load i64, i64* [[GA_CASTED]], align 8
// CHECK0-64-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK0-64-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i64*
// CHECK0-64-NEXT:    store i64 [[TMP9]], i64* [[TMP14]], align 8
// CHECK0-64-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK0-64-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64*
// CHECK0-64-NEXT:    store i64 [[TMP9]], i64* [[TMP16]], align 8
// CHECK0-64-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
// CHECK0-64-NEXT:    store i8* null, i8** [[TMP17]], align 8
// CHECK0-64-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK0-64-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
// CHECK0-64-NEXT:    store i32* [[TMP10]], i32** [[TMP19]], align 8
// CHECK0-64-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK0-64-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
// CHECK0-64-NEXT:    store i32* [[TMP10]], i32** [[TMP21]], align 8
// CHECK0-64-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
// CHECK0-64-NEXT:    store i8* null, i8** [[TMP22]], align 8
// CHECK0-64-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK0-64-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i64*
// CHECK0-64-NEXT:    store i64 [[TMP12]], i64* [[TMP24]], align 8
// CHECK0-64-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK0-64-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64*
// CHECK0-64-NEXT:    store i64 [[TMP12]], i64* [[TMP26]], align 8
// CHECK0-64-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
// CHECK0-64-NEXT:    store i8* null, i8** [[TMP27]], align 8
// CHECK0-64-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK0-64-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK0-64-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK0-64-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK0-64-NEXT:    store i32 2, i32* [[TMP30]], align 4
// CHECK0-64-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK0-64-NEXT:    store i32 3, i32* [[TMP31]], align 4
// CHECK0-64-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK0-64-NEXT:    store i8** [[TMP28]], i8*** [[TMP32]], align 8
// CHECK0-64-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK0-64-NEXT:    store i8** [[TMP29]], i8*** [[TMP33]], align 8
// CHECK0-64-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK0-64-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP34]], align 8
// CHECK0-64-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK0-64-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP35]], align 8
// CHECK0-64-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK0-64-NEXT:    store i8** null, i8*** [[TMP36]], align 8
// CHECK0-64-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK0-64-NEXT:    store i8** null, i8*** [[TMP37]], align 8
// CHECK0-64-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK0-64-NEXT:    store i64 0, i64* [[TMP38]], align 8
// CHECK0-64-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK0-64-NEXT:    store i64 0, i64* [[TMP39]], align 8
// CHECK0-64-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK0-64-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP40]], align 4
// CHECK0-64-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK0-64-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP41]], align 4
// CHECK0-64-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK0-64-NEXT:    store i32 0, i32* [[TMP42]], align 4
// CHECK0-64-NEXT:    [[TMP43:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK0-64-NEXT:    [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0
// CHECK0-64-NEXT:    br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK0-64:       omp_offload.failed:
// CHECK0-64-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63(i64 [[TMP9]], i32* [[TMP10]], i64 [[TMP12]]) #[[ATTR3:[0-9]+]]
// CHECK0-64-NEXT:    br label [[OMP_OFFLOAD_CONT]]
// CHECK0-64:       omp_offload.cont:
// CHECK0-64-NEXT:    [[TMP45:%.*]] = load i16, i16* [[AA]], align 2
// CHECK0-64-NEXT:    [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16*
// CHECK0-64-NEXT:    store i16 [[TMP45]], i16* [[CONV3]], align 2
// CHECK0-64-NEXT:    [[TMP46:%.*]] = load i64, i64* [[AA_CASTED]], align 8
// CHECK0-64-NEXT:    [[TMP47:%.*]] = mul nuw i64 [[TMP1]], 4
// CHECK0-64-NEXT:    [[TMP48:%.*]] = mul nuw i64 5, [[TMP4]]
// CHECK0-64-NEXT:    [[TMP49:%.*]] = mul nuw i64 [[TMP48]], 8
// CHECK0-64-NEXT:    [[TMP50:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
// CHECK0-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP50]], i8* align 8 bitcast ([9 x i64]* @.offload_sizes.1 to i8*), i64 72, i1 false)
// CHECK0-64-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
// CHECK0-64-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i64*
// CHECK0-64-NEXT:    store i64 [[TMP46]], i64* [[TMP52]], align 8
// CHECK0-64-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
// CHECK0-64-NEXT:    [[TMP54:%.*]] = bitcast i8** [[TMP53]] to i64*
// CHECK0-64-NEXT:    store i64 [[TMP46]], i64* [[TMP54]], align 8
// CHECK0-64-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 0
// CHECK0-64-NEXT:    store i8* null, i8** [[TMP55]], align 8
// CHECK0-64-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
// CHECK0-64-NEXT:    [[TMP57:%.*]] = bitcast i8** [[TMP56]] to [10 x float]**
// CHECK0-64-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP57]], align 8
// CHECK0-64-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
// CHECK0-64-NEXT:    [[TMP59:%.*]] = bitcast i8** [[TMP58]] to [10 x float]**
// CHECK0-64-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP59]], align 8
// CHECK0-64-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 1
// CHECK0-64-NEXT:    store i8* null, i8** [[TMP60]], align 8
// CHECK0-64-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
// CHECK0-64-NEXT:    [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i64*
// CHECK0-64-NEXT:    store i64 [[TMP1]], i64* [[TMP62]], align 8
// CHECK0-64-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
// CHECK0-64-NEXT:    [[TMP64:%.*]] = bitcast i8** [[TMP63]] to i64*
// CHECK0-64-NEXT:    store i64 [[TMP1]], i64* [[TMP64]], align 8
// CHECK0-64-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 2
// CHECK0-64-NEXT:    store i8* null, i8** [[TMP65]], align 8
// CHECK0-64-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 3
// CHECK0-64-NEXT:    [[TMP67:%.*]] = bitcast i8** [[TMP66]] to float**
// CHECK0-64-NEXT:    store float* [[VLA]], float** [[TMP67]], align 8
// CHECK0-64-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 3
// CHECK0-64-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to float**
// CHECK0-64-NEXT:    store float* [[VLA]], float** [[TMP69]], align 8
// CHECK0-64-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
// CHECK0-64-NEXT:    store i64 [[TMP47]], i64* [[TMP70]], align 8
// CHECK0-64-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 3
// CHECK0-64-NEXT:    store i8* null, i8** [[TMP71]], align 8
// CHECK0-64-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 4
// CHECK0-64-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to [5 x [10 x double]]**
// CHECK0-64-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP73]], align 8
// CHECK0-64-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 4
// CHECK0-64-NEXT:    [[TMP75:%.*]] = bitcast i8** [[TMP74]] to [5 x [10 x double]]**
// CHECK0-64-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP75]], align 8
// CHECK0-64-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 4
// CHECK0-64-NEXT:    store i8* null, i8** [[TMP76]], align 8
// CHECK0-64-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 5
// CHECK0-64-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i64*
// CHECK0-64-NEXT:    store i64 5, i64* [[TMP78]], align 8
// CHECK0-64-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 5
// CHECK0-64-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i64*
// CHECK0-64-NEXT:    store i64 5, i64* [[TMP80]], align 8
// CHECK0-64-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 5
// CHECK0-64-NEXT:    store i8* null, i8** [[TMP81]], align 8
// CHECK0-64-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 6
// CHECK0-64-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i64*
// CHECK0-64-NEXT:    store i64 [[TMP4]], i64* [[TMP83]], align 8
// CHECK0-64-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 6
// CHECK0-64-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i64*
// CHECK0-64-NEXT:    store i64 [[TMP4]], i64* [[TMP85]], align 8
// CHECK0-64-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 6
// CHECK0-64-NEXT:    store i8* null, i8** [[TMP86]], align 8
// CHECK0-64-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 7
// CHECK0-64-NEXT:    [[TMP88:%.*]] = bitcast i8** [[TMP87]] to double**
// CHECK0-64-NEXT:    store double* [[VLA1]], double** [[TMP88]], align 8
// CHECK0-64-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 7
// CHECK0-64-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to double**
// CHECK0-64-NEXT:    store double* [[VLA1]], double** [[TMP90]], align 8
// CHECK0-64-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
// CHECK0-64-NEXT:    store i64 [[TMP49]], i64* [[TMP91]], align 8
// CHECK0-64-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 7
// CHECK0-64-NEXT:    store i8* null, i8** [[TMP92]], align 8
// CHECK0-64-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 8
// CHECK0-64-NEXT:    [[TMP94:%.*]] = bitcast i8** [[TMP93]] to %struct.TT**
// CHECK0-64-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP94]], align 8
// CHECK0-64-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 8
// CHECK0-64-NEXT:    [[TMP96:%.*]] = bitcast i8** [[TMP95]] to %struct.TT**
// CHECK0-64-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP96]], align 8
// CHECK0-64-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 8
// CHECK0-64-NEXT:    store i8* null, i8** [[TMP97]], align 8
// CHECK0-64-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
// CHECK0-64-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
// CHECK0-64-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
// CHECK0-64-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
// CHECK0-64-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 0
// CHECK0-64-NEXT:    store i32 2, i32* [[TMP101]], align 4
// CHECK0-64-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 1
// CHECK0-64-NEXT:    store i32 9, i32* [[TMP102]], align 4
// CHECK0-64-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 2
// CHECK0-64-NEXT:    store i8** [[TMP98]], i8*** [[TMP103]], align 8
// CHECK0-64-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 3
// CHECK0-64-NEXT:    store i8** [[TMP99]], i8*** [[TMP104]], align 8
// CHECK0-64-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 4
// CHECK0-64-NEXT:    store i64* [[TMP100]], i64** [[TMP105]], align 8
// CHECK0-64-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 5
// CHECK0-64-NEXT:    store i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.2, i32 0, i32 0), i64** [[TMP106]], align 8
// CHECK0-64-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 6
// CHECK0-64-NEXT:    store i8** null, i8*** [[TMP107]], align 8
// CHECK0-64-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 7
// CHECK0-64-NEXT:    store i8** null, i8*** [[TMP108]], align 8
// CHECK0-64-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 8
// CHECK0-64-NEXT:    store i64 0, i64* [[TMP109]], align 8
// CHECK0-64-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 9
// CHECK0-64-NEXT:    store i64 0, i64* [[TMP110]], align 8
// CHECK0-64-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 10
// CHECK0-64-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP111]], align 4
// CHECK0-64-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 11
// CHECK0-64-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP112]], align 4
// CHECK0-64-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 12
// CHECK0-64-NEXT:    store i32 0, i32* [[TMP113]], align 4
// CHECK0-64-NEXT:    [[TMP114:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]])
// CHECK0-64-NEXT:    [[TMP115:%.*]] = icmp ne i32 [[TMP114]], 0
// CHECK0-64-NEXT:    br i1 [[TMP115]], label [[OMP_OFFLOAD_FAILED8:%.*]], label [[OMP_OFFLOAD_CONT9:%.*]]
// CHECK0-64:       omp_offload.failed8:
// CHECK0-64-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70(i64 [[TMP46]], [10 x float]* [[B]], i64 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP4]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR3]]
// CHECK0-64-NEXT:    br label [[OMP_OFFLOAD_CONT9]]
// CHECK0-64:       omp_offload.cont9:
// CHECK0-64-NEXT:    [[TMP116:%.*]] = load double*, double** [[PTR_ADDR]], align 8
// CHECK0-64-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0
// CHECK0-64-NEXT:    [[TMP118:%.*]] = bitcast i8** [[TMP117]] to double**
// CHECK0-64-NEXT:    store double* [[TMP116]], double** [[TMP118]], align 8
// CHECK0-64-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 0
// CHECK0-64-NEXT:    [[TMP120:%.*]] = bitcast i8** [[TMP119]] to double**
// CHECK0-64-NEXT:    store double* [[TMP116]], double** [[TMP120]], align 8
// CHECK0-64-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 0
// CHECK0-64-NEXT:    store i8* null, i8** [[TMP121]], align 8
// CHECK0-64-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 1
// CHECK0-64-NEXT:    [[TMP123:%.*]] = bitcast i8** [[TMP122]] to %struct.TT.0**
// CHECK0-64-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[TMP123]], align 8
// CHECK0-64-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 1
// CHECK0-64-NEXT:    [[TMP125:%.*]] = bitcast i8** [[TMP124]] to %struct.TT.0**
// CHECK0-64-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[TMP125]], align 8
// CHECK0-64-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 1
// CHECK0-64-NEXT:    store i8* null, i8** [[TMP126]], align 8
// CHECK0-64-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0
// CHECK0-64-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 0
// CHECK0-64-NEXT:    [[KERNEL_ARGS13:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
// CHECK0-64-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 0
// CHECK0-64-NEXT:    store i32 2, i32* [[TMP129]], align 4
// CHECK0-64-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 1
// CHECK0-64-NEXT:    store i32 2, i32* [[TMP130]], align 4
// CHECK0-64-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 2
// CHECK0-64-NEXT:    store i8** [[TMP127]], i8*** [[TMP131]], align 8
// CHECK0-64-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 3
// CHECK0-64-NEXT:    store i8** [[TMP128]], i8*** [[TMP132]], align 8
// CHECK0-64-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 4
// CHECK0-64-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.3, i32 0, i32 0), i64** [[TMP133]], align 8
// CHECK0-64-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 5
// CHECK0-64-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.4, i32 0, i32 0), i64** [[TMP134]], align 8
// CHECK0-64-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 6
// CHECK0-64-NEXT:    store i8** null, i8*** [[TMP135]], align 8
// CHECK0-64-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 7
// CHECK0-64-NEXT:    store i8** null, i8*** [[TMP136]], align 8
// CHECK0-64-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 8
// CHECK0-64-NEXT:    store i64 0, i64* [[TMP137]], align 8
// CHECK0-64-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 9
// CHECK0-64-NEXT:    store i64 0, i64* [[TMP138]], align 8
// CHECK0-64-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 10
// CHECK0-64-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP139]], align 4
// CHECK0-64-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 11
// CHECK0-64-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP140]], align 4
// CHECK0-64-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 12
// CHECK0-64-NEXT:    store i32 0, i32* [[TMP141]], align 4
// CHECK0-64-NEXT:    [[TMP142:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]])
// CHECK0-64-NEXT:    [[TMP143:%.*]] = icmp ne i32 [[TMP142]], 0
// CHECK0-64-NEXT:    br i1 [[TMP143]], label [[OMP_OFFLOAD_FAILED14:%.*]], label [[OMP_OFFLOAD_CONT15:%.*]]
// CHECK0-64:       omp_offload.failed14:
// CHECK0-64-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111(double* [[TMP116]], %struct.TT.0* [[E]]) #[[ATTR3]]
// CHECK0-64-NEXT:    br label [[OMP_OFFLOAD_CONT15]]
// CHECK0-64:       omp_offload.cont15:
// CHECK0-64-NEXT:    [[TMP144:%.*]] = load i32, i32* [[A]], align 4
// CHECK0-64-NEXT:    [[TMP145:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
// CHECK0-64-NEXT:    call void @llvm.stackrestore(i8* [[TMP145]])
// CHECK0-64-NEXT:    ret i32 [[TMP144]]
// CHECK0-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63
// CHECK0-64-SAME: (i64 noundef [[A:%.*]], i32* noundef [[P:%.*]], i64 noundef [[GA:%.*]]) #[[ATTR2:[0-9]+]] {
// CHECK0-64-NEXT:  entry:
// CHECK0-64-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK0-64-NEXT:    [[P_ADDR:%.*]] = alloca i32*, align 8
// CHECK0-64-NEXT:    [[GA_ADDR:%.*]] = alloca i64, align 8
// CHECK0-64-NEXT:    store i64 [[A]], i64* [[A_ADDR]], align 8
// CHECK0-64-NEXT:    store i32* [[P]], i32** [[P_ADDR]], align 8
// CHECK0-64-NEXT:    store i64 [[GA]], i64* [[GA_ADDR]], align 8
// CHECK0-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
// CHECK0-64-NEXT:    [[CONV1:%.*]] = bitcast i64* [[GA_ADDR]] to i32*
// CHECK0-64-NEXT:    ret void
// CHECK0-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70
// CHECK0-64-SAME: (i64 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR2]] {
// CHECK0-64-NEXT:  entry:
// CHECK0-64-NEXT:    [[AA_ADDR:%.*]] = alloca i64, align 8
// CHECK0-64-NEXT:    [[B_ADDR:%.*]] = alloca [10 x float]*, align 8
// CHECK0-64-NEXT:    [[VLA_ADDR:%.*]] = alloca i64, align 8
// CHECK0-64-NEXT:    [[BN_ADDR:%.*]] = alloca float*, align 8
// CHECK0-64-NEXT:    [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 8
// CHECK0-64-NEXT:    [[VLA_ADDR2:%.*]] = alloca i64, align 8
// CHECK0-64-NEXT:    [[VLA_ADDR4:%.*]] = alloca i64, align 8
// CHECK0-64-NEXT:    [[CN_ADDR:%.*]] = alloca double*, align 8
// CHECK0-64-NEXT:    [[D_ADDR:%.*]] = alloca %struct.TT*, align 8
// CHECK0-64-NEXT:    [[B5:%.*]] = alloca [10 x float], align 4
// CHECK0-64-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 8
// CHECK0-64-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// CHECK0-64-NEXT:    [[C7:%.*]] = alloca [5 x [10 x double]], align 8
// CHECK0-64-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i64, align 8
// CHECK0-64-NEXT:    [[__VLA_EXPR2:%.*]] = alloca i64, align 8
// CHECK0-64-NEXT:    [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 8
// CHECK0-64-NEXT:    store i64 [[AA]], i64* [[AA_ADDR]], align 8
// CHECK0-64-NEXT:    store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8
// CHECK0-64-NEXT:    store i64 [[VLA]], i64* [[VLA_ADDR]], align 8
// CHECK0-64-NEXT:    store float* [[BN]], float** [[BN_ADDR]], align 8
// CHECK0-64-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 8
// CHECK0-64-NEXT:    store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8
// CHECK0-64-NEXT:    store i64 [[VLA3]], i64* [[VLA_ADDR4]], align 8
// CHECK0-64-NEXT:    store double* [[CN]], double** [[CN_ADDR]], align 8
// CHECK0-64-NEXT:    store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 8
// CHECK0-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16*
// CHECK0-64-NEXT:    [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 8
// CHECK0-64-NEXT:    [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8
// CHECK0-64-NEXT:    [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 8
// CHECK0-64-NEXT:    [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 8
// CHECK0-64-NEXT:    [[TMP4:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8
// CHECK0-64-NEXT:    [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8
// CHECK0-64-NEXT:    [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8
// CHECK0-64-NEXT:    [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8
// CHECK0-64-NEXT:    [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8*
// CHECK0-64-NEXT:    [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8*
// CHECK0-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i64 40, i1 false)
// CHECK0-64-NEXT:    [[TMP10:%.*]] = call i8* @llvm.stacksave()
// CHECK0-64-NEXT:    store i8* [[TMP10]], i8** [[SAVED_STACK]], align 8
// CHECK0-64-NEXT:    [[VLA6:%.*]] = alloca float, i64 [[TMP1]], align 4
// CHECK0-64-NEXT:    store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8
// CHECK0-64-NEXT:    [[TMP11:%.*]] = mul nuw i64 [[TMP1]], 4
// CHECK0-64-NEXT:    [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8*
// CHECK0-64-NEXT:    [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8*
// CHECK0-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 [[TMP11]], i1 false)
// CHECK0-64-NEXT:    [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8*
// CHECK0-64-NEXT:    [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8*
// CHECK0-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 400, i1 false)
// CHECK0-64-NEXT:    [[TMP16:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]]
// CHECK0-64-NEXT:    [[VLA8:%.*]] = alloca double, i64 [[TMP16]], align 8
// CHECK0-64-NEXT:    store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8
// CHECK0-64-NEXT:    store i64 [[TMP5]], i64* [[__VLA_EXPR2]], align 8
// CHECK0-64-NEXT:    [[TMP17:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]]
// CHECK0-64-NEXT:    [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 8
// CHECK0-64-NEXT:    [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8*
// CHECK0-64-NEXT:    [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8*
// CHECK0-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i64 [[TMP18]], i1 false)
// CHECK0-64-NEXT:    [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8*
// CHECK0-64-NEXT:    [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8*
// CHECK0-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP21]], i8* align 8 [[TMP22]], i64 16, i1 false)
// CHECK0-64-NEXT:    [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2
// CHECK0-64-NEXT:    [[CONV10:%.*]] = sext i16 [[TMP23]] to i32
// CHECK0-64-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV10]], 1
// CHECK0-64-NEXT:    [[CONV11:%.*]] = trunc i32 [[ADD]] to i16
// CHECK0-64-NEXT:    store i16 [[CONV11]], i16* [[CONV]], align 2
// CHECK0-64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i64 0, i64 2
// CHECK0-64-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX]], align 4
// CHECK0-64-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i64 3
// CHECK0-64-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX12]], align 4
// CHECK0-64-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i64 0, i64 1
// CHECK0-64-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i64 0, i64 2
// CHECK0-64-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX14]], align 8
// CHECK0-64-NEXT:    [[TMP24:%.*]] = mul nsw i64 1, [[TMP5]]
// CHECK0-64-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i64 [[TMP24]]
// CHECK0-64-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i64 3
// CHECK0-64-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX16]], align 8
// CHECK0-64-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0
// CHECK0-64-NEXT:    store i64 1, i64* [[X]], align 8
// CHECK0-64-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1
// CHECK0-64-NEXT:    store i8 1, i8* [[Y]], align 8
// CHECK0-64-NEXT:    [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
// CHECK0-64-NEXT:    call void @llvm.stackrestore(i8* [[TMP25]])
// CHECK0-64-NEXT:    ret void
// CHECK0-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111
// CHECK0-64-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR2]] {
// CHECK0-64-NEXT:  entry:
// CHECK0-64-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 8
// CHECK0-64-NEXT:    [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 8
// CHECK0-64-NEXT:    [[E1:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
// CHECK0-64-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 8
// CHECK0-64-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 8
// CHECK0-64-NEXT:    [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 8
// CHECK0-64-NEXT:    [[TMP1:%.*]] = bitcast %struct.TT.0* [[E1]] to i8*
// CHECK0-64-NEXT:    [[TMP2:%.*]] = bitcast %struct.TT.0* [[TMP0]] to i8*
// CHECK0-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 8, i1 false)
// CHECK0-64-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E1]], i32 0, i32 0
// CHECK0-64-NEXT:    [[TMP3:%.*]] = load i32, i32* [[X]], align 4
// CHECK0-64-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP3]] to double
// CHECK0-64-NEXT:    [[TMP4:%.*]] = load double*, double** [[PTR_ADDR]], align 8
// CHECK0-64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP4]], i64 0
// CHECK0-64-NEXT:    store double [[CONV]], double* [[ARRAYIDX]], align 8
// CHECK0-64-NEXT:    [[TMP5:%.*]] = load double*, double** [[PTR_ADDR]], align 8
// CHECK0-64-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[TMP5]], i64 0
// CHECK0-64-NEXT:    [[TMP6:%.*]] = load double, double* [[ARRAYIDX2]], align 8
// CHECK0-64-NEXT:    [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00
// CHECK0-64-NEXT:    store double [[INC]], double* [[ARRAYIDX2]], align 8
// CHECK0-64-NEXT:    ret void
// CHECK0-64-LABEL: define {{[^@]+}}@_Z3bariPd
// CHECK0-64-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] {
// CHECK0-64-NEXT:  entry:
// CHECK0-64-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK0-64-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 8
// CHECK0-64-NEXT:    [[A:%.*]] = alloca i32, align 4
// CHECK0-64-NEXT:    [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 8
// CHECK0-64-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK0-64-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 8
// CHECK0-64-NEXT:    store i32 0, i32* [[A]], align 4
// CHECK0-64-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK0-64-NEXT:    [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 8
// CHECK0-64-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z3fooiPd(i32 noundef signext [[TMP0]], double* noundef [[TMP1]])
// CHECK0-64-NEXT:    [[TMP2:%.*]] = load i32, i32* [[A]], align 4
// CHECK0-64-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]]
// CHECK0-64-NEXT:    store i32 [[ADD]], i32* [[A]], align 4
// CHECK0-64-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK0-64-NEXT:    [[CALL1:%.*]] = call noundef signext i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 8 dereferenceable(8) [[S]], i32 noundef signext [[TMP3]])
// CHECK0-64-NEXT:    [[TMP4:%.*]] = load i32, i32* [[A]], align 4
// CHECK0-64-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]]
// CHECK0-64-NEXT:    store i32 [[ADD2]], i32* [[A]], align 4
// CHECK0-64-NEXT:    [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK0-64-NEXT:    [[CALL3:%.*]] = call noundef signext i32 @_ZL7fstatici(i32 noundef signext [[TMP5]])
// CHECK0-64-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A]], align 4
// CHECK0-64-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]]
// CHECK0-64-NEXT:    store i32 [[ADD4]], i32* [[A]], align 4
// CHECK0-64-NEXT:    [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK0-64-NEXT:    [[CALL5:%.*]] = call noundef signext i32 @_Z9ftemplateIiET_i(i32 noundef signext [[TMP7]])
// CHECK0-64-NEXT:    [[TMP8:%.*]] = load i32, i32* [[A]], align 4
// CHECK0-64-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]]
// CHECK0-64-NEXT:    store i32 [[ADD6]], i32* [[A]], align 4
// CHECK0-64-NEXT:    [[TMP9:%.*]] = load i32, i32* [[A]], align 4
// CHECK0-64-NEXT:    ret i32 [[TMP9]]
// CHECK0-64-LABEL: define {{[^@]+}}@_ZN2S12r1Ei
// CHECK0-64-SAME: (%struct.S1* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat align 2 {
// CHECK0-64-NEXT:  entry:
// CHECK0-64-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
// CHECK0-64-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK0-64-NEXT:    [[B:%.*]] = alloca i32, align 4
// CHECK0-64-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 8
// CHECK0-64-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// CHECK0-64-NEXT:    [[B_CASTED:%.*]] = alloca i64, align 8
// CHECK0-64-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [5 x i8*], align 8
// CHECK0-64-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x i8*], align 8
// CHECK0-64-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x i8*], align 8
// CHECK0-64-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 8
// CHECK0-64-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
// CHECK0-64-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK0-64-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
// CHECK0-64-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK0-64-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// CHECK0-64-NEXT:    store i32 [[ADD]], i32* [[B]], align 4
// CHECK0-64-NEXT:    [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK0-64-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
// CHECK0-64-NEXT:    [[TMP3:%.*]] = call i8* @llvm.stacksave()
// CHECK0-64-NEXT:    store i8* [[TMP3]], i8** [[SAVED_STACK]], align 8
// CHECK0-64-NEXT:    [[TMP4:%.*]] = mul nuw i64 2, [[TMP2]]
// CHECK0-64-NEXT:    [[VLA:%.*]] = alloca i16, i64 [[TMP4]], align 2
// CHECK0-64-NEXT:    store i64 [[TMP2]], i64* [[__VLA_EXPR0]], align 8
// CHECK0-64-NEXT:    [[TMP5:%.*]] = load i32, i32* [[B]], align 4
// CHECK0-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[B_CASTED]] to i32*
// CHECK0-64-NEXT:    store i32 [[TMP5]], i32* [[CONV]], align 4
// CHECK0-64-NEXT:    [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8
// CHECK0-64-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
// CHECK0-64-NEXT:    [[TMP7:%.*]] = mul nuw i64 2, [[TMP2]]
// CHECK0-64-NEXT:    [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2
// CHECK0-64-NEXT:    [[TMP9:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
// CHECK0-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP9]], i8* align 8 bitcast ([5 x i64]* @.offload_sizes.5 to i8*), i64 40, i1 false)
// CHECK0-64-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK0-64-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
// CHECK0-64-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 8
// CHECK0-64-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK0-64-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
// CHECK0-64-NEXT:    store double* [[A]], double** [[TMP13]], align 8
// CHECK0-64-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
// CHECK0-64-NEXT:    store i8* null, i8** [[TMP14]], align 8
// CHECK0-64-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK0-64-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64*
// CHECK0-64-NEXT:    store i64 [[TMP6]], i64* [[TMP16]], align 8
// CHECK0-64-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK0-64-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i64*
// CHECK0-64-NEXT:    store i64 [[TMP6]], i64* [[TMP18]], align 8
// CHECK0-64-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
// CHECK0-64-NEXT:    store i8* null, i8** [[TMP19]], align 8
// CHECK0-64-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK0-64-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i64*
// CHECK0-64-NEXT:    store i64 2, i64* [[TMP21]], align 8
// CHECK0-64-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK0-64-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i64*
// CHECK0-64-NEXT:    store i64 2, i64* [[TMP23]], align 8
// CHECK0-64-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
// CHECK0-64-NEXT:    store i8* null, i8** [[TMP24]], align 8
// CHECK0-64-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
// CHECK0-64-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64*
// CHECK0-64-NEXT:    store i64 [[TMP2]], i64* [[TMP26]], align 8
// CHECK0-64-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
// CHECK0-64-NEXT:    [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i64*
// CHECK0-64-NEXT:    store i64 [[TMP2]], i64* [[TMP28]], align 8
// CHECK0-64-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
// CHECK0-64-NEXT:    store i8* null, i8** [[TMP29]], align 8
// CHECK0-64-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
// CHECK0-64-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i16**
// CHECK0-64-NEXT:    store i16* [[VLA]], i16** [[TMP31]], align 8
// CHECK0-64-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
// CHECK0-64-NEXT:    [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i16**
// CHECK0-64-NEXT:    store i16* [[VLA]], i16** [[TMP33]], align 8
// CHECK0-64-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
// CHECK0-64-NEXT:    store i64 [[TMP8]], i64* [[TMP34]], align 8
// CHECK0-64-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
// CHECK0-64-NEXT:    store i8* null, i8** [[TMP35]], align 8
// CHECK0-64-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK0-64-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK0-64-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
// CHECK0-64-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK0-64-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK0-64-NEXT:    store i32 2, i32* [[TMP39]], align 4
// CHECK0-64-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK0-64-NEXT:    store i32 5, i32* [[TMP40]], align 4
// CHECK0-64-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK0-64-NEXT:    store i8** [[TMP36]], i8*** [[TMP41]], align 8
// CHECK0-64-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK0-64-NEXT:    store i8** [[TMP37]], i8*** [[TMP42]], align 8
// CHECK0-64-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK0-64-NEXT:    store i64* [[TMP38]], i64** [[TMP43]], align 8
// CHECK0-64-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK0-64-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.6, i32 0, i32 0), i64** [[TMP44]], align 8
// CHECK0-64-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK0-64-NEXT:    store i8** null, i8*** [[TMP45]], align 8
// CHECK0-64-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK0-64-NEXT:    store i8** null, i8*** [[TMP46]], align 8
// CHECK0-64-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK0-64-NEXT:    store i64 0, i64* [[TMP47]], align 8
// CHECK0-64-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK0-64-NEXT:    store i64 0, i64* [[TMP48]], align 8
// CHECK0-64-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK0-64-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP49]], align 4
// CHECK0-64-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK0-64-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP50]], align 4
// CHECK0-64-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK0-64-NEXT:    store i32 0, i32* [[TMP51]], align 4
// CHECK0-64-NEXT:    [[TMP52:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK0-64-NEXT:    [[TMP53:%.*]] = icmp ne i32 [[TMP52]], 0
// CHECK0-64-NEXT:    br i1 [[TMP53]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK0-64:       omp_offload.failed:
// CHECK0-64-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR3]]
// CHECK0-64-NEXT:    br label [[OMP_OFFLOAD_CONT]]
// CHECK0-64:       omp_offload.cont:
// CHECK0-64-NEXT:    [[TMP54:%.*]] = mul nsw i64 1, [[TMP2]]
// CHECK0-64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP54]]
// CHECK0-64-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1
// CHECK0-64-NEXT:    [[TMP55:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
// CHECK0-64-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP55]] to i32
// CHECK0-64-NEXT:    [[TMP56:%.*]] = load i32, i32* [[B]], align 4
// CHECK0-64-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP56]]
// CHECK0-64-NEXT:    [[TMP57:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
// CHECK0-64-NEXT:    call void @llvm.stackrestore(i8* [[TMP57]])
// CHECK0-64-NEXT:    ret i32 [[ADD4]]
// CHECK0-64-LABEL: define {{[^@]+}}@_ZL7fstatici
// CHECK0-64-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] {
// CHECK0-64-NEXT:  entry:
// CHECK0-64-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK0-64-NEXT:    [[A:%.*]] = alloca i32, align 4
// CHECK0-64-NEXT:    [[AAA:%.*]] = alloca i8, align 1
// CHECK0-64-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
// CHECK0-64-NEXT:    [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK0-64-NEXT:    [[AAA_CASTED:%.*]] = alloca i64, align 8
// CHECK0-64-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 8
// CHECK0-64-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 8
// CHECK0-64-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 8
// CHECK0-64-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK0-64-NEXT:    store i32 0, i32* [[A]], align 4
// CHECK0-64-NEXT:    store i8 0, i8* [[AAA]], align 1
// CHECK0-64-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
// CHECK0-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32*
// CHECK0-64-NEXT:    store i32 [[TMP0]], i32* [[CONV]], align 4
// CHECK0-64-NEXT:    [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8
// CHECK0-64-NEXT:    [[TMP2:%.*]] = load i8, i8* [[AAA]], align 1
// CHECK0-64-NEXT:    [[CONV1:%.*]] = bitcast i64* [[AAA_CASTED]] to i8*
// CHECK0-64-NEXT:    store i8 [[TMP2]], i8* [[CONV1]], align 1
// CHECK0-64-NEXT:    [[TMP3:%.*]] = load i64, i64* [[AAA_CASTED]], align 8
// CHECK0-64-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK0-64-NEXT:    [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i64*
// CHECK0-64-NEXT:    store i64 [[TMP1]], i64* [[TMP5]], align 8
// CHECK0-64-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK0-64-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64*
// CHECK0-64-NEXT:    store i64 [[TMP1]], i64* [[TMP7]], align 8
// CHECK0-64-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
// CHECK0-64-NEXT:    store i8* null, i8** [[TMP8]], align 8
// CHECK0-64-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK0-64-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
// CHECK0-64-NEXT:    store i64 [[TMP3]], i64* [[TMP10]], align 8
// CHECK0-64-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK0-64-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i64*
// CHECK0-64-NEXT:    store i64 [[TMP3]], i64* [[TMP12]], align 8
// CHECK0-64-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
// CHECK0-64-NEXT:    store i8* null, i8** [[TMP13]], align 8
// CHECK0-64-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK0-64-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to [10 x i32]**
// CHECK0-64-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP15]], align 8
// CHECK0-64-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK0-64-NEXT:    [[TMP17:%.*]] = bitcast i8** [[TMP16]] to [10 x i32]**
// CHECK0-64-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP17]], align 8
// CHECK0-64-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
// CHECK0-64-NEXT:    store i8* null, i8** [[TMP18]], align 8
// CHECK0-64-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK0-64-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK0-64-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK0-64-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK0-64-NEXT:    store i32 2, i32* [[TMP21]], align 4
// CHECK0-64-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK0-64-NEXT:    store i32 3, i32* [[TMP22]], align 4
// CHECK0-64-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK0-64-NEXT:    store i8** [[TMP19]], i8*** [[TMP23]], align 8
// CHECK0-64-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK0-64-NEXT:    store i8** [[TMP20]], i8*** [[TMP24]], align 8
// CHECK0-64-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK0-64-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.7, i32 0, i32 0), i64** [[TMP25]], align 8
// CHECK0-64-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK0-64-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.8, i32 0, i32 0), i64** [[TMP26]], align 8
// CHECK0-64-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK0-64-NEXT:    store i8** null, i8*** [[TMP27]], align 8
// CHECK0-64-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK0-64-NEXT:    store i8** null, i8*** [[TMP28]], align 8
// CHECK0-64-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK0-64-NEXT:    store i64 0, i64* [[TMP29]], align 8
// CHECK0-64-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK0-64-NEXT:    store i64 0, i64* [[TMP30]], align 8
// CHECK0-64-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK0-64-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP31]], align 4
// CHECK0-64-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK0-64-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP32]], align 4
// CHECK0-64-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK0-64-NEXT:    store i32 0, i32* [[TMP33]], align 4
// CHECK0-64-NEXT:    [[TMP34:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK0-64-NEXT:    [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0
// CHECK0-64-NEXT:    br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK0-64:       omp_offload.failed:
// CHECK0-64-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142(i64 [[TMP1]], i64 [[TMP3]], [10 x i32]* [[B]]) #[[ATTR3]]
// CHECK0-64-NEXT:    br label [[OMP_OFFLOAD_CONT]]
// CHECK0-64:       omp_offload.cont:
// CHECK0-64-NEXT:    [[TMP36:%.*]] = load i32, i32* [[A]], align 4
// CHECK0-64-NEXT:    ret i32 [[TMP36]]
// CHECK0-64-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i
// CHECK0-64-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat {
// CHECK0-64-NEXT:  entry:
// CHECK0-64-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK0-64-NEXT:    [[A:%.*]] = alloca i32, align 4
// CHECK0-64-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
// CHECK0-64-NEXT:    [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK0-64-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x i8*], align 8
// CHECK0-64-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x i8*], align 8
// CHECK0-64-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x i8*], align 8
// CHECK0-64-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK0-64-NEXT:    store i32 0, i32* [[A]], align 4
// CHECK0-64-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
// CHECK0-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32*
// CHECK0-64-NEXT:    store i32 [[TMP0]], i32* [[CONV]], align 4
// CHECK0-64-NEXT:    [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8
// CHECK0-64-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK0-64-NEXT:    [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i64*
// CHECK0-64-NEXT:    store i64 [[TMP1]], i64* [[TMP3]], align 8
// CHECK0-64-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK0-64-NEXT:    [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i64*
// CHECK0-64-NEXT:    store i64 [[TMP1]], i64* [[TMP5]], align 8
// CHECK0-64-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
// CHECK0-64-NEXT:    store i8* null, i8** [[TMP6]], align 8
// CHECK0-64-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK0-64-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to [10 x i32]**
// CHECK0-64-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP8]], align 8
// CHECK0-64-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK0-64-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to [10 x i32]**
// CHECK0-64-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP10]], align 8
// CHECK0-64-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
// CHECK0-64-NEXT:    store i8* null, i8** [[TMP11]], align 8
// CHECK0-64-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK0-64-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK0-64-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK0-64-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK0-64-NEXT:    store i32 2, i32* [[TMP14]], align 4
// CHECK0-64-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK0-64-NEXT:    store i32 2, i32* [[TMP15]], align 4
// CHECK0-64-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK0-64-NEXT:    store i8** [[TMP12]], i8*** [[TMP16]], align 8
// CHECK0-64-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK0-64-NEXT:    store i8** [[TMP13]], i8*** [[TMP17]], align 8
// CHECK0-64-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK0-64-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.9, i32 0, i32 0), i64** [[TMP18]], align 8
// CHECK0-64-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK0-64-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.10, i32 0, i32 0), i64** [[TMP19]], align 8
// CHECK0-64-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK0-64-NEXT:    store i8** null, i8*** [[TMP20]], align 8
// CHECK0-64-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK0-64-NEXT:    store i8** null, i8*** [[TMP21]], align 8
// CHECK0-64-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK0-64-NEXT:    store i64 0, i64* [[TMP22]], align 8
// CHECK0-64-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK0-64-NEXT:    store i64 0, i64* [[TMP23]], align 8
// CHECK0-64-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK0-64-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP24]], align 4
// CHECK0-64-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK0-64-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP25]], align 4
// CHECK0-64-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK0-64-NEXT:    store i32 0, i32* [[TMP26]], align 4
// CHECK0-64-NEXT:    [[TMP27:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK0-64-NEXT:    [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0
// CHECK0-64-NEXT:    br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK0-64:       omp_offload.failed:
// CHECK0-64-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128(i64 [[TMP1]], [10 x i32]* [[B]]) #[[ATTR3]]
// CHECK0-64-NEXT:    br label [[OMP_OFFLOAD_CONT]]
// CHECK0-64:       omp_offload.cont:
// CHECK0-64-NEXT:    [[TMP29:%.*]] = load i32, i32* [[A]], align 4
// CHECK0-64-NEXT:    ret i32 [[TMP29]]
// CHECK0-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167
// CHECK0-64-SAME: (%struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] {
// CHECK0-64-NEXT:  entry:
// CHECK0-64-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
// CHECK0-64-NEXT:    [[B_ADDR:%.*]] = alloca i64, align 8
// CHECK0-64-NEXT:    [[VLA_ADDR:%.*]] = alloca i64, align 8
// CHECK0-64-NEXT:    [[VLA_ADDR2:%.*]] = alloca i64, align 8
// CHECK0-64-NEXT:    [[C_ADDR:%.*]] = alloca i16*, align 8
// CHECK0-64-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 8
// CHECK0-64-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// CHECK0-64-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i64, align 8
// CHECK0-64-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
// CHECK0-64-NEXT:    store i64 [[B]], i64* [[B_ADDR]], align 8
// CHECK0-64-NEXT:    store i64 [[VLA]], i64* [[VLA_ADDR]], align 8
// CHECK0-64-NEXT:    store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8
// CHECK0-64-NEXT:    store i16* [[C]], i16** [[C_ADDR]], align 8
// CHECK0-64-NEXT:    [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
// CHECK0-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32*
// CHECK0-64-NEXT:    [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8
// CHECK0-64-NEXT:    [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8
// CHECK0-64-NEXT:    [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8
// CHECK0-64-NEXT:    [[TMP4:%.*]] = call i8* @llvm.stacksave()
// CHECK0-64-NEXT:    store i8* [[TMP4]], i8** [[SAVED_STACK]], align 8
// CHECK0-64-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]]
// CHECK0-64-NEXT:    [[VLA3:%.*]] = alloca i16, i64 [[TMP5]], align 2
// CHECK0-64-NEXT:    store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8
// CHECK0-64-NEXT:    store i64 [[TMP2]], i64* [[__VLA_EXPR1]], align 8
// CHECK0-64-NEXT:    [[TMP6:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]]
// CHECK0-64-NEXT:    [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 2
// CHECK0-64-NEXT:    [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8*
// CHECK0-64-NEXT:    [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8*
// CHECK0-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i64 [[TMP7]], i1 false)
// CHECK0-64-NEXT:    [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4
// CHECK0-64-NEXT:    [[CONV4:%.*]] = sitofp i32 [[TMP10]] to double
// CHECK0-64-NEXT:    [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00
// CHECK0-64-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0
// CHECK0-64-NEXT:    store double [[ADD]], double* [[A]], align 8
// CHECK0-64-NEXT:    [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0
// CHECK0-64-NEXT:    [[TMP11:%.*]] = load double, double* [[A5]], align 8
// CHECK0-64-NEXT:    [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00
// CHECK0-64-NEXT:    store double [[INC]], double* [[A5]], align 8
// CHECK0-64-NEXT:    [[CONV6:%.*]] = fptosi double [[INC]] to i16
// CHECK0-64-NEXT:    [[TMP12:%.*]] = mul nsw i64 1, [[TMP2]]
// CHECK0-64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i64 [[TMP12]]
// CHECK0-64-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1
// CHECK0-64-NEXT:    store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2
// CHECK0-64-NEXT:    [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
// CHECK0-64-NEXT:    call void @llvm.stackrestore(i8* [[TMP13]])
// CHECK0-64-NEXT:    ret void
// CHECK0-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142
// CHECK0-64-SAME: (i64 noundef [[A:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
// CHECK0-64-NEXT:  entry:
// CHECK0-64-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK0-64-NEXT:    [[AAA_ADDR:%.*]] = alloca i64, align 8
// CHECK0-64-NEXT:    [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8
// CHECK0-64-NEXT:    [[B2:%.*]] = alloca [10 x i32], align 4
// CHECK0-64-NEXT:    store i64 [[A]], i64* [[A_ADDR]], align 8
// CHECK0-64-NEXT:    store i64 [[AAA]], i64* [[AAA_ADDR]], align 8
// CHECK0-64-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8
// CHECK0-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
// CHECK0-64-NEXT:    [[CONV1:%.*]] = bitcast i64* [[AAA_ADDR]] to i8*
// CHECK0-64-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8
// CHECK0-64-NEXT:    [[TMP1:%.*]] = bitcast [10 x i32]* [[B2]] to i8*
// CHECK0-64-NEXT:    [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
// CHECK0-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false)
// CHECK0-64-NEXT:    [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4
// CHECK0-64-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// CHECK0-64-NEXT:    store i32 [[ADD]], i32* [[CONV]], align 4
// CHECK0-64-NEXT:    [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 1
// CHECK0-64-NEXT:    [[CONV3:%.*]] = sext i8 [[TMP4]] to i32
// CHECK0-64-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1
// CHECK0-64-NEXT:    [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8
// CHECK0-64-NEXT:    store i8 [[CONV5]], i8* [[CONV1]], align 1
// CHECK0-64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B2]], i64 0, i64 2
// CHECK0-64-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// CHECK0-64-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP5]], 1
// CHECK0-64-NEXT:    store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4
// CHECK0-64-NEXT:    ret void
// CHECK0-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128
// CHECK0-64-SAME: (i64 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
// CHECK0-64-NEXT:  entry:
// CHECK0-64-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK0-64-NEXT:    [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8
// CHECK0-64-NEXT:    [[B1:%.*]] = alloca [10 x i32], align 4
// CHECK0-64-NEXT:    store i64 [[A]], i64* [[A_ADDR]], align 8
// CHECK0-64-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8
// CHECK0-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
// CHECK0-64-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8
// CHECK0-64-NEXT:    [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8*
// CHECK0-64-NEXT:    [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
// CHECK0-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false)
// CHECK0-64-NEXT:    [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4
// CHECK0-64-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// CHECK0-64-NEXT:    store i32 [[ADD]], i32* [[CONV]], align 4
// CHECK0-64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i64 0, i64 2
// CHECK0-64-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// CHECK0-64-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1
// CHECK0-64-NEXT:    store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4
// CHECK0-64-NEXT:    ret void
// CHECK0-64-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
// CHECK0-64-SAME: () #[[ATTR5:[0-9]+]] {
// CHECK0-64-NEXT:  entry:
// CHECK0-64-NEXT:    call void @__tgt_register_requires(i64 1)
// CHECK0-64-NEXT:    ret void
// CHECK1-64-LABEL: define {{[^@]+}}@_Z3fooiPd
// CHECK1-64-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK1-64-NEXT:  entry:
// CHECK1-64-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK1-64-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 8
// CHECK1-64-NEXT:    [[A:%.*]] = alloca i32, align 4
// CHECK1-64-NEXT:    [[AA:%.*]] = alloca i16, align 2
// CHECK1-64-NEXT:    [[B:%.*]] = alloca [10 x float], align 4
// CHECK1-64-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 8
// CHECK1-64-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// CHECK1-64-NEXT:    [[C:%.*]] = alloca [5 x [10 x double]], align 8
// CHECK1-64-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i64, align 8
// CHECK1-64-NEXT:    [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 8
// CHECK1-64-NEXT:    [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
// CHECK1-64-NEXT:    [[P:%.*]] = alloca i32*, align 64
// CHECK1-64-NEXT:    [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK1-64-NEXT:    [[GA_CASTED:%.*]] = alloca i64, align 8
// CHECK1-64-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 8
// CHECK1-64-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 8
// CHECK1-64-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 8
// CHECK1-64-NEXT:    [[AA_CASTED:%.*]] = alloca i64, align 8
// CHECK1-64-NEXT:    [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [9 x i8*], align 8
// CHECK1-64-NEXT:    [[DOTOFFLOAD_PTRS5:%.*]] = alloca [9 x i8*], align 8
// CHECK1-64-NEXT:    [[DOTOFFLOAD_MAPPERS6:%.*]] = alloca [9 x i8*], align 8
// CHECK1-64-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 8
// CHECK1-64-NEXT:    [[DOTOFFLOAD_BASEPTRS10:%.*]] = alloca [2 x i8*], align 8
// CHECK1-64-NEXT:    [[DOTOFFLOAD_PTRS11:%.*]] = alloca [2 x i8*], align 8
// CHECK1-64-NEXT:    [[DOTOFFLOAD_MAPPERS12:%.*]] = alloca [2 x i8*], align 8
// CHECK1-64-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK1-64-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 8
// CHECK1-64-NEXT:    store i32 0, i32* [[A]], align 4
// CHECK1-64-NEXT:    store i16 0, i16* [[AA]], align 2
// CHECK1-64-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK1-64-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
// CHECK1-64-NEXT:    [[TMP2:%.*]] = call i8* @llvm.stacksave()
// CHECK1-64-NEXT:    store i8* [[TMP2]], i8** [[SAVED_STACK]], align 8
// CHECK1-64-NEXT:    [[VLA:%.*]] = alloca float, i64 [[TMP1]], align 4
// CHECK1-64-NEXT:    store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8
// CHECK1-64-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK1-64-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
// CHECK1-64-NEXT:    [[TMP5:%.*]] = mul nuw i64 5, [[TMP4]]
// CHECK1-64-NEXT:    [[VLA1:%.*]] = alloca double, i64 [[TMP5]], align 8
// CHECK1-64-NEXT:    store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8
// CHECK1-64-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0
// CHECK1-64-NEXT:    [[TMP6:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK1-64-NEXT:    store i32 [[TMP6]], i32* [[X]], align 4
// CHECK1-64-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1
// CHECK1-64-NEXT:    [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK1-64-NEXT:    store i32 [[TMP7]], i32* [[Y]], align 4
// CHECK1-64-NEXT:    store i32* [[A]], i32** [[P]], align 64
// CHECK1-64-NEXT:    [[TMP8:%.*]] = load i32, i32* [[A]], align 4
// CHECK1-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32*
// CHECK1-64-NEXT:    store i32 [[TMP8]], i32* [[CONV]], align 4
// CHECK1-64-NEXT:    [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8
// CHECK1-64-NEXT:    [[TMP10:%.*]] = load i32*, i32** [[P]], align 64
// CHECK1-64-NEXT:    [[TMP11:%.*]] = load i32, i32* @ga, align 4
// CHECK1-64-NEXT:    [[CONV2:%.*]] = bitcast i64* [[GA_CASTED]] to i32*
// CHECK1-64-NEXT:    store i32 [[TMP11]], i32* [[CONV2]], align 4
// CHECK1-64-NEXT:    [[TMP12:%.*]] = load i64, i64* [[GA_CASTED]], align 8
// CHECK1-64-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-64-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i64*
// CHECK1-64-NEXT:    store i64 [[TMP9]], i64* [[TMP14]], align 8
// CHECK1-64-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-64-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64*
// CHECK1-64-NEXT:    store i64 [[TMP9]], i64* [[TMP16]], align 8
// CHECK1-64-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
// CHECK1-64-NEXT:    store i8* null, i8** [[TMP17]], align 8
// CHECK1-64-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK1-64-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
// CHECK1-64-NEXT:    store i32* [[TMP10]], i32** [[TMP19]], align 8
// CHECK1-64-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK1-64-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
// CHECK1-64-NEXT:    store i32* [[TMP10]], i32** [[TMP21]], align 8
// CHECK1-64-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
// CHECK1-64-NEXT:    store i8* null, i8** [[TMP22]], align 8
// CHECK1-64-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK1-64-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i64*
// CHECK1-64-NEXT:    store i64 [[TMP12]], i64* [[TMP24]], align 8
// CHECK1-64-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK1-64-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64*
// CHECK1-64-NEXT:    store i64 [[TMP12]], i64* [[TMP26]], align 8
// CHECK1-64-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
// CHECK1-64-NEXT:    store i8* null, i8** [[TMP27]], align 8
// CHECK1-64-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-64-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-64-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK1-64-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK1-64-NEXT:    store i32 2, i32* [[TMP30]], align 4
// CHECK1-64-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK1-64-NEXT:    store i32 3, i32* [[TMP31]], align 4
// CHECK1-64-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK1-64-NEXT:    store i8** [[TMP28]], i8*** [[TMP32]], align 8
// CHECK1-64-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK1-64-NEXT:    store i8** [[TMP29]], i8*** [[TMP33]], align 8
// CHECK1-64-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK1-64-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP34]], align 8
// CHECK1-64-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK1-64-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP35]], align 8
// CHECK1-64-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK1-64-NEXT:    store i8** null, i8*** [[TMP36]], align 8
// CHECK1-64-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK1-64-NEXT:    store i8** null, i8*** [[TMP37]], align 8
// CHECK1-64-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK1-64-NEXT:    store i64 0, i64* [[TMP38]], align 8
// CHECK1-64-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK1-64-NEXT:    store i64 0, i64* [[TMP39]], align 8
// CHECK1-64-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK1-64-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP40]], align 4
// CHECK1-64-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK1-64-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP41]], align 4
// CHECK1-64-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK1-64-NEXT:    store i32 0, i32* [[TMP42]], align 4
// CHECK1-64-NEXT:    [[TMP43:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK1-64-NEXT:    [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0
// CHECK1-64-NEXT:    br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK1-64:       omp_offload.failed:
// CHECK1-64-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63(i64 [[TMP9]], i32* [[TMP10]], i64 [[TMP12]]) #[[ATTR3:[0-9]+]]
// CHECK1-64-NEXT:    br label [[OMP_OFFLOAD_CONT]]
// CHECK1-64:       omp_offload.cont:
// CHECK1-64-NEXT:    [[TMP45:%.*]] = load i16, i16* [[AA]], align 2
// CHECK1-64-NEXT:    [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16*
// CHECK1-64-NEXT:    store i16 [[TMP45]], i16* [[CONV3]], align 2
// CHECK1-64-NEXT:    [[TMP46:%.*]] = load i64, i64* [[AA_CASTED]], align 8
// CHECK1-64-NEXT:    [[TMP47:%.*]] = mul nuw i64 [[TMP1]], 4
// CHECK1-64-NEXT:    [[TMP48:%.*]] = mul nuw i64 5, [[TMP4]]
// CHECK1-64-NEXT:    [[TMP49:%.*]] = mul nuw i64 [[TMP48]], 8
// CHECK1-64-NEXT:    [[TMP50:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
// CHECK1-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP50]], i8* align 8 bitcast ([9 x i64]* @.offload_sizes.1 to i8*), i64 72, i1 false)
// CHECK1-64-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
// CHECK1-64-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i64*
// CHECK1-64-NEXT:    store i64 [[TMP46]], i64* [[TMP52]], align 8
// CHECK1-64-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
// CHECK1-64-NEXT:    [[TMP54:%.*]] = bitcast i8** [[TMP53]] to i64*
// CHECK1-64-NEXT:    store i64 [[TMP46]], i64* [[TMP54]], align 8
// CHECK1-64-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 0
// CHECK1-64-NEXT:    store i8* null, i8** [[TMP55]], align 8
// CHECK1-64-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
// CHECK1-64-NEXT:    [[TMP57:%.*]] = bitcast i8** [[TMP56]] to [10 x float]**
// CHECK1-64-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP57]], align 8
// CHECK1-64-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
// CHECK1-64-NEXT:    [[TMP59:%.*]] = bitcast i8** [[TMP58]] to [10 x float]**
// CHECK1-64-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP59]], align 8
// CHECK1-64-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 1
// CHECK1-64-NEXT:    store i8* null, i8** [[TMP60]], align 8
// CHECK1-64-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
// CHECK1-64-NEXT:    [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i64*
// CHECK1-64-NEXT:    store i64 [[TMP1]], i64* [[TMP62]], align 8
// CHECK1-64-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
// CHECK1-64-NEXT:    [[TMP64:%.*]] = bitcast i8** [[TMP63]] to i64*
// CHECK1-64-NEXT:    store i64 [[TMP1]], i64* [[TMP64]], align 8
// CHECK1-64-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 2
// CHECK1-64-NEXT:    store i8* null, i8** [[TMP65]], align 8
// CHECK1-64-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 3
// CHECK1-64-NEXT:    [[TMP67:%.*]] = bitcast i8** [[TMP66]] to float**
// CHECK1-64-NEXT:    store float* [[VLA]], float** [[TMP67]], align 8
// CHECK1-64-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 3
// CHECK1-64-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to float**
// CHECK1-64-NEXT:    store float* [[VLA]], float** [[TMP69]], align 8
// CHECK1-64-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
// CHECK1-64-NEXT:    store i64 [[TMP47]], i64* [[TMP70]], align 8
// CHECK1-64-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 3
// CHECK1-64-NEXT:    store i8* null, i8** [[TMP71]], align 8
// CHECK1-64-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 4
// CHECK1-64-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to [5 x [10 x double]]**
// CHECK1-64-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP73]], align 8
// CHECK1-64-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 4
// CHECK1-64-NEXT:    [[TMP75:%.*]] = bitcast i8** [[TMP74]] to [5 x [10 x double]]**
// CHECK1-64-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP75]], align 8
// CHECK1-64-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 4
// CHECK1-64-NEXT:    store i8* null, i8** [[TMP76]], align 8
// CHECK1-64-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 5
// CHECK1-64-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i64*
// CHECK1-64-NEXT:    store i64 5, i64* [[TMP78]], align 8
// CHECK1-64-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 5
// CHECK1-64-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i64*
// CHECK1-64-NEXT:    store i64 5, i64* [[TMP80]], align 8
// CHECK1-64-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 5
// CHECK1-64-NEXT:    store i8* null, i8** [[TMP81]], align 8
// CHECK1-64-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 6
// CHECK1-64-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i64*
// CHECK1-64-NEXT:    store i64 [[TMP4]], i64* [[TMP83]], align 8
// CHECK1-64-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 6
// CHECK1-64-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i64*
// CHECK1-64-NEXT:    store i64 [[TMP4]], i64* [[TMP85]], align 8
// CHECK1-64-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 6
// CHECK1-64-NEXT:    store i8* null, i8** [[TMP86]], align 8
// CHECK1-64-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 7
// CHECK1-64-NEXT:    [[TMP88:%.*]] = bitcast i8** [[TMP87]] to double**
// CHECK1-64-NEXT:    store double* [[VLA1]], double** [[TMP88]], align 8
// CHECK1-64-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 7
// CHECK1-64-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to double**
// CHECK1-64-NEXT:    store double* [[VLA1]], double** [[TMP90]], align 8
// CHECK1-64-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
// CHECK1-64-NEXT:    store i64 [[TMP49]], i64* [[TMP91]], align 8
// CHECK1-64-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 7
// CHECK1-64-NEXT:    store i8* null, i8** [[TMP92]], align 8
// CHECK1-64-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 8
// CHECK1-64-NEXT:    [[TMP94:%.*]] = bitcast i8** [[TMP93]] to %struct.TT**
// CHECK1-64-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP94]], align 8
// CHECK1-64-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 8
// CHECK1-64-NEXT:    [[TMP96:%.*]] = bitcast i8** [[TMP95]] to %struct.TT**
// CHECK1-64-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP96]], align 8
// CHECK1-64-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 8
// CHECK1-64-NEXT:    store i8* null, i8** [[TMP97]], align 8
// CHECK1-64-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
// CHECK1-64-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
// CHECK1-64-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
// CHECK1-64-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
// CHECK1-64-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 0
// CHECK1-64-NEXT:    store i32 2, i32* [[TMP101]], align 4
// CHECK1-64-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 1
// CHECK1-64-NEXT:    store i32 9, i32* [[TMP102]], align 4
// CHECK1-64-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 2
// CHECK1-64-NEXT:    store i8** [[TMP98]], i8*** [[TMP103]], align 8
// CHECK1-64-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 3
// CHECK1-64-NEXT:    store i8** [[TMP99]], i8*** [[TMP104]], align 8
// CHECK1-64-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 4
// CHECK1-64-NEXT:    store i64* [[TMP100]], i64** [[TMP105]], align 8
// CHECK1-64-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 5
// CHECK1-64-NEXT:    store i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.2, i32 0, i32 0), i64** [[TMP106]], align 8
// CHECK1-64-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 6
// CHECK1-64-NEXT:    store i8** null, i8*** [[TMP107]], align 8
// CHECK1-64-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 7
// CHECK1-64-NEXT:    store i8** null, i8*** [[TMP108]], align 8
// CHECK1-64-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 8
// CHECK1-64-NEXT:    store i64 0, i64* [[TMP109]], align 8
// CHECK1-64-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 9
// CHECK1-64-NEXT:    store i64 0, i64* [[TMP110]], align 8
// CHECK1-64-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 10
// CHECK1-64-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP111]], align 4
// CHECK1-64-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 11
// CHECK1-64-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP112]], align 4
// CHECK1-64-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 12
// CHECK1-64-NEXT:    store i32 0, i32* [[TMP113]], align 4
// CHECK1-64-NEXT:    [[TMP114:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]])
// CHECK1-64-NEXT:    [[TMP115:%.*]] = icmp ne i32 [[TMP114]], 0
// CHECK1-64-NEXT:    br i1 [[TMP115]], label [[OMP_OFFLOAD_FAILED8:%.*]], label [[OMP_OFFLOAD_CONT9:%.*]]
// CHECK1-64:       omp_offload.failed8:
// CHECK1-64-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70(i64 [[TMP46]], [10 x float]* [[B]], i64 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP4]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR3]]
// CHECK1-64-NEXT:    br label [[OMP_OFFLOAD_CONT9]]
// CHECK1-64:       omp_offload.cont9:
// CHECK1-64-NEXT:    [[TMP116:%.*]] = load double*, double** [[PTR_ADDR]], align 8
// CHECK1-64-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0
// CHECK1-64-NEXT:    [[TMP118:%.*]] = bitcast i8** [[TMP117]] to double**
// CHECK1-64-NEXT:    store double* [[TMP116]], double** [[TMP118]], align 8
// CHECK1-64-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 0
// CHECK1-64-NEXT:    [[TMP120:%.*]] = bitcast i8** [[TMP119]] to double**
// CHECK1-64-NEXT:    store double* [[TMP116]], double** [[TMP120]], align 8
// CHECK1-64-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 0
// CHECK1-64-NEXT:    store i8* null, i8** [[TMP121]], align 8
// CHECK1-64-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 1
// CHECK1-64-NEXT:    [[TMP123:%.*]] = bitcast i8** [[TMP122]] to %struct.TT.0**
// CHECK1-64-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[TMP123]], align 8
// CHECK1-64-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 1
// CHECK1-64-NEXT:    [[TMP125:%.*]] = bitcast i8** [[TMP124]] to %struct.TT.0**
// CHECK1-64-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[TMP125]], align 8
// CHECK1-64-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 1
// CHECK1-64-NEXT:    store i8* null, i8** [[TMP126]], align 8
// CHECK1-64-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0
// CHECK1-64-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 0
// CHECK1-64-NEXT:    [[KERNEL_ARGS13:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
// CHECK1-64-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 0
// CHECK1-64-NEXT:    store i32 2, i32* [[TMP129]], align 4
// CHECK1-64-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 1
// CHECK1-64-NEXT:    store i32 2, i32* [[TMP130]], align 4
// CHECK1-64-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 2
// CHECK1-64-NEXT:    store i8** [[TMP127]], i8*** [[TMP131]], align 8
// CHECK1-64-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 3
// CHECK1-64-NEXT:    store i8** [[TMP128]], i8*** [[TMP132]], align 8
// CHECK1-64-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 4
// CHECK1-64-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.3, i32 0, i32 0), i64** [[TMP133]], align 8
// CHECK1-64-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 5
// CHECK1-64-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.4, i32 0, i32 0), i64** [[TMP134]], align 8
// CHECK1-64-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 6
// CHECK1-64-NEXT:    store i8** null, i8*** [[TMP135]], align 8
// CHECK1-64-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 7
// CHECK1-64-NEXT:    store i8** null, i8*** [[TMP136]], align 8
// CHECK1-64-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 8
// CHECK1-64-NEXT:    store i64 0, i64* [[TMP137]], align 8
// CHECK1-64-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 9
// CHECK1-64-NEXT:    store i64 0, i64* [[TMP138]], align 8
// CHECK1-64-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 10
// CHECK1-64-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP139]], align 4
// CHECK1-64-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 11
// CHECK1-64-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP140]], align 4
// CHECK1-64-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 12
// CHECK1-64-NEXT:    store i32 0, i32* [[TMP141]], align 4
// CHECK1-64-NEXT:    [[TMP142:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]])
// CHECK1-64-NEXT:    [[TMP143:%.*]] = icmp ne i32 [[TMP142]], 0
// CHECK1-64-NEXT:    br i1 [[TMP143]], label [[OMP_OFFLOAD_FAILED14:%.*]], label [[OMP_OFFLOAD_CONT15:%.*]]
// CHECK1-64:       omp_offload.failed14:
// CHECK1-64-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111(double* [[TMP116]], %struct.TT.0* [[E]]) #[[ATTR3]]
// CHECK1-64-NEXT:    br label [[OMP_OFFLOAD_CONT15]]
// CHECK1-64:       omp_offload.cont15:
// CHECK1-64-NEXT:    [[TMP144:%.*]] = load i32, i32* [[A]], align 4
// CHECK1-64-NEXT:    [[TMP145:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
// CHECK1-64-NEXT:    call void @llvm.stackrestore(i8* [[TMP145]])
// CHECK1-64-NEXT:    ret i32 [[TMP144]]
// CHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63
// CHECK1-64-SAME: (i64 noundef [[A:%.*]], i32* noundef [[P:%.*]], i64 noundef [[GA:%.*]]) #[[ATTR2:[0-9]+]] {
// CHECK1-64-NEXT:  entry:
// CHECK1-64-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK1-64-NEXT:    [[P_ADDR:%.*]] = alloca i32*, align 8
// CHECK1-64-NEXT:    [[GA_ADDR:%.*]] = alloca i64, align 8
// CHECK1-64-NEXT:    store i64 [[A]], i64* [[A_ADDR]], align 8
// CHECK1-64-NEXT:    store i32* [[P]], i32** [[P_ADDR]], align 8
// CHECK1-64-NEXT:    store i64 [[GA]], i64* [[GA_ADDR]], align 8
// CHECK1-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
// CHECK1-64-NEXT:    [[CONV1:%.*]] = bitcast i64* [[GA_ADDR]] to i32*
// CHECK1-64-NEXT:    ret void
// CHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70
// CHECK1-64-SAME: (i64 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR2]] {
// CHECK1-64-NEXT:  entry:
// CHECK1-64-NEXT:    [[AA_ADDR:%.*]] = alloca i64, align 8
// CHECK1-64-NEXT:    [[B_ADDR:%.*]] = alloca [10 x float]*, align 8
// CHECK1-64-NEXT:    [[VLA_ADDR:%.*]] = alloca i64, align 8
// CHECK1-64-NEXT:    [[BN_ADDR:%.*]] = alloca float*, align 8
// CHECK1-64-NEXT:    [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 8
// CHECK1-64-NEXT:    [[VLA_ADDR2:%.*]] = alloca i64, align 8
// CHECK1-64-NEXT:    [[VLA_ADDR4:%.*]] = alloca i64, align 8
// CHECK1-64-NEXT:    [[CN_ADDR:%.*]] = alloca double*, align 8
// CHECK1-64-NEXT:    [[D_ADDR:%.*]] = alloca %struct.TT*, align 8
// CHECK1-64-NEXT:    [[B5:%.*]] = alloca [10 x float], align 4
// CHECK1-64-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 8
// CHECK1-64-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// CHECK1-64-NEXT:    [[C7:%.*]] = alloca [5 x [10 x double]], align 8
// CHECK1-64-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i64, align 8
// CHECK1-64-NEXT:    [[__VLA_EXPR2:%.*]] = alloca i64, align 8
// CHECK1-64-NEXT:    [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 8
// CHECK1-64-NEXT:    store i64 [[AA]], i64* [[AA_ADDR]], align 8
// CHECK1-64-NEXT:    store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8
// CHECK1-64-NEXT:    store i64 [[VLA]], i64* [[VLA_ADDR]], align 8
// CHECK1-64-NEXT:    store float* [[BN]], float** [[BN_ADDR]], align 8
// CHECK1-64-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 8
// CHECK1-64-NEXT:    store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8
// CHECK1-64-NEXT:    store i64 [[VLA3]], i64* [[VLA_ADDR4]], align 8
// CHECK1-64-NEXT:    store double* [[CN]], double** [[CN_ADDR]], align 8
// CHECK1-64-NEXT:    store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 8
// CHECK1-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16*
// CHECK1-64-NEXT:    [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 8
// CHECK1-64-NEXT:    [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8
// CHECK1-64-NEXT:    [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 8
// CHECK1-64-NEXT:    [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 8
// CHECK1-64-NEXT:    [[TMP4:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8
// CHECK1-64-NEXT:    [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8
// CHECK1-64-NEXT:    [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8
// CHECK1-64-NEXT:    [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8
// CHECK1-64-NEXT:    [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8*
// CHECK1-64-NEXT:    [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8*
// CHECK1-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i64 40, i1 false)
// CHECK1-64-NEXT:    [[TMP10:%.*]] = call i8* @llvm.stacksave()
// CHECK1-64-NEXT:    store i8* [[TMP10]], i8** [[SAVED_STACK]], align 8
// CHECK1-64-NEXT:    [[VLA6:%.*]] = alloca float, i64 [[TMP1]], align 4
// CHECK1-64-NEXT:    store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8
// CHECK1-64-NEXT:    [[TMP11:%.*]] = mul nuw i64 [[TMP1]], 4
// CHECK1-64-NEXT:    [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8*
// CHECK1-64-NEXT:    [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8*
// CHECK1-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 [[TMP11]], i1 false)
// CHECK1-64-NEXT:    [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8*
// CHECK1-64-NEXT:    [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8*
// CHECK1-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 400, i1 false)
// CHECK1-64-NEXT:    [[TMP16:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]]
// CHECK1-64-NEXT:    [[VLA8:%.*]] = alloca double, i64 [[TMP16]], align 8
// CHECK1-64-NEXT:    store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8
// CHECK1-64-NEXT:    store i64 [[TMP5]], i64* [[__VLA_EXPR2]], align 8
// CHECK1-64-NEXT:    [[TMP17:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]]
// CHECK1-64-NEXT:    [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 8
// CHECK1-64-NEXT:    [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8*
// CHECK1-64-NEXT:    [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8*
// CHECK1-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i64 [[TMP18]], i1 false)
// CHECK1-64-NEXT:    [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8*
// CHECK1-64-NEXT:    [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8*
// CHECK1-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP21]], i8* align 8 [[TMP22]], i64 16, i1 false)
// CHECK1-64-NEXT:    [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2
// CHECK1-64-NEXT:    [[CONV10:%.*]] = sext i16 [[TMP23]] to i32
// CHECK1-64-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV10]], 1
// CHECK1-64-NEXT:    [[CONV11:%.*]] = trunc i32 [[ADD]] to i16
// CHECK1-64-NEXT:    store i16 [[CONV11]], i16* [[CONV]], align 2
// CHECK1-64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i64 0, i64 2
// CHECK1-64-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX]], align 4
// CHECK1-64-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i64 3
// CHECK1-64-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX12]], align 4
// CHECK1-64-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i64 0, i64 1
// CHECK1-64-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i64 0, i64 2
// CHECK1-64-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX14]], align 8
// CHECK1-64-NEXT:    [[TMP24:%.*]] = mul nsw i64 1, [[TMP5]]
// CHECK1-64-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i64 [[TMP24]]
// CHECK1-64-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i64 3
// CHECK1-64-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX16]], align 8
// CHECK1-64-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0
// CHECK1-64-NEXT:    store i64 1, i64* [[X]], align 8
// CHECK1-64-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1
// CHECK1-64-NEXT:    store i8 1, i8* [[Y]], align 8
// CHECK1-64-NEXT:    [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
// CHECK1-64-NEXT:    call void @llvm.stackrestore(i8* [[TMP25]])
// CHECK1-64-NEXT:    ret void
// CHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111
// CHECK1-64-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR2]] {
// CHECK1-64-NEXT:  entry:
// CHECK1-64-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 8
// CHECK1-64-NEXT:    [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 8
// CHECK1-64-NEXT:    [[E1:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
// CHECK1-64-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 8
// CHECK1-64-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 8
// CHECK1-64-NEXT:    [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 8
// CHECK1-64-NEXT:    [[TMP1:%.*]] = bitcast %struct.TT.0* [[E1]] to i8*
// CHECK1-64-NEXT:    [[TMP2:%.*]] = bitcast %struct.TT.0* [[TMP0]] to i8*
// CHECK1-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 8, i1 false)
// CHECK1-64-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E1]], i32 0, i32 0
// CHECK1-64-NEXT:    [[TMP3:%.*]] = load i32, i32* [[X]], align 4
// CHECK1-64-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP3]] to double
// CHECK1-64-NEXT:    [[TMP4:%.*]] = load double*, double** [[PTR_ADDR]], align 8
// CHECK1-64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP4]], i64 0
// CHECK1-64-NEXT:    store double [[CONV]], double* [[ARRAYIDX]], align 8
// CHECK1-64-NEXT:    [[TMP5:%.*]] = load double*, double** [[PTR_ADDR]], align 8
// CHECK1-64-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[TMP5]], i64 0
// CHECK1-64-NEXT:    [[TMP6:%.*]] = load double, double* [[ARRAYIDX2]], align 8
// CHECK1-64-NEXT:    [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00
// CHECK1-64-NEXT:    store double [[INC]], double* [[ARRAYIDX2]], align 8
// CHECK1-64-NEXT:    ret void
// CHECK1-64-LABEL: define {{[^@]+}}@_Z3bariPd
// CHECK1-64-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] {
// CHECK1-64-NEXT:  entry:
// CHECK1-64-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK1-64-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 8
// CHECK1-64-NEXT:    [[A:%.*]] = alloca i32, align 4
// CHECK1-64-NEXT:    [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 8
// CHECK1-64-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK1-64-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 8
// CHECK1-64-NEXT:    store i32 0, i32* [[A]], align 4
// CHECK1-64-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK1-64-NEXT:    [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 8
// CHECK1-64-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z3fooiPd(i32 noundef signext [[TMP0]], double* noundef [[TMP1]])
// CHECK1-64-NEXT:    [[TMP2:%.*]] = load i32, i32* [[A]], align 4
// CHECK1-64-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]]
// CHECK1-64-NEXT:    store i32 [[ADD]], i32* [[A]], align 4
// CHECK1-64-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK1-64-NEXT:    [[CALL1:%.*]] = call noundef signext i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 8 dereferenceable(8) [[S]], i32 noundef signext [[TMP3]])
// CHECK1-64-NEXT:    [[TMP4:%.*]] = load i32, i32* [[A]], align 4
// CHECK1-64-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]]
// CHECK1-64-NEXT:    store i32 [[ADD2]], i32* [[A]], align 4
// CHECK1-64-NEXT:    [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK1-64-NEXT:    [[CALL3:%.*]] = call noundef signext i32 @_ZL7fstatici(i32 noundef signext [[TMP5]])
// CHECK1-64-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A]], align 4
// CHECK1-64-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]]
// CHECK1-64-NEXT:    store i32 [[ADD4]], i32* [[A]], align 4
// CHECK1-64-NEXT:    [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK1-64-NEXT:    [[CALL5:%.*]] = call noundef signext i32 @_Z9ftemplateIiET_i(i32 noundef signext [[TMP7]])
// CHECK1-64-NEXT:    [[TMP8:%.*]] = load i32, i32* [[A]], align 4
// CHECK1-64-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]]
// CHECK1-64-NEXT:    store i32 [[ADD6]], i32* [[A]], align 4
// CHECK1-64-NEXT:    [[TMP9:%.*]] = load i32, i32* [[A]], align 4
// CHECK1-64-NEXT:    ret i32 [[TMP9]]
// CHECK1-64-LABEL: define {{[^@]+}}@_ZN2S12r1Ei
// CHECK1-64-SAME: (%struct.S1* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat align 2 {
// CHECK1-64-NEXT:  entry:
// CHECK1-64-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
// CHECK1-64-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK1-64-NEXT:    [[B:%.*]] = alloca i32, align 4
// CHECK1-64-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 8
// CHECK1-64-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// CHECK1-64-NEXT:    [[B_CASTED:%.*]] = alloca i64, align 8
// CHECK1-64-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [5 x i8*], align 8
// CHECK1-64-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x i8*], align 8
// CHECK1-64-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x i8*], align 8
// CHECK1-64-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 8
// CHECK1-64-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
// CHECK1-64-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK1-64-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
// CHECK1-64-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK1-64-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// CHECK1-64-NEXT:    store i32 [[ADD]], i32* [[B]], align 4
// CHECK1-64-NEXT:    [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK1-64-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
// CHECK1-64-NEXT:    [[TMP3:%.*]] = call i8* @llvm.stacksave()
// CHECK1-64-NEXT:    store i8* [[TMP3]], i8** [[SAVED_STACK]], align 8
// CHECK1-64-NEXT:    [[TMP4:%.*]] = mul nuw i64 2, [[TMP2]]
// CHECK1-64-NEXT:    [[VLA:%.*]] = alloca i16, i64 [[TMP4]], align 2
// CHECK1-64-NEXT:    store i64 [[TMP2]], i64* [[__VLA_EXPR0]], align 8
// CHECK1-64-NEXT:    [[TMP5:%.*]] = load i32, i32* [[B]], align 4
// CHECK1-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[B_CASTED]] to i32*
// CHECK1-64-NEXT:    store i32 [[TMP5]], i32* [[CONV]], align 4
// CHECK1-64-NEXT:    [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8
// CHECK1-64-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
// CHECK1-64-NEXT:    [[TMP7:%.*]] = mul nuw i64 2, [[TMP2]]
// CHECK1-64-NEXT:    [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2
// CHECK1-64-NEXT:    [[TMP9:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
// CHECK1-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP9]], i8* align 8 bitcast ([5 x i64]* @.offload_sizes.5 to i8*), i64 40, i1 false)
// CHECK1-64-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-64-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
// CHECK1-64-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 8
// CHECK1-64-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-64-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
// CHECK1-64-NEXT:    store double* [[A]], double** [[TMP13]], align 8
// CHECK1-64-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
// CHECK1-64-NEXT:    store i8* null, i8** [[TMP14]], align 8
// CHECK1-64-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK1-64-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64*
// CHECK1-64-NEXT:    store i64 [[TMP6]], i64* [[TMP16]], align 8
// CHECK1-64-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK1-64-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i64*
// CHECK1-64-NEXT:    store i64 [[TMP6]], i64* [[TMP18]], align 8
// CHECK1-64-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
// CHECK1-64-NEXT:    store i8* null, i8** [[TMP19]], align 8
// CHECK1-64-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK1-64-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i64*
// CHECK1-64-NEXT:    store i64 2, i64* [[TMP21]], align 8
// CHECK1-64-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK1-64-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i64*
// CHECK1-64-NEXT:    store i64 2, i64* [[TMP23]], align 8
// CHECK1-64-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
// CHECK1-64-NEXT:    store i8* null, i8** [[TMP24]], align 8
// CHECK1-64-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
// CHECK1-64-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64*
// CHECK1-64-NEXT:    store i64 [[TMP2]], i64* [[TMP26]], align 8
// CHECK1-64-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
// CHECK1-64-NEXT:    [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i64*
// CHECK1-64-NEXT:    store i64 [[TMP2]], i64* [[TMP28]], align 8
// CHECK1-64-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
// CHECK1-64-NEXT:    store i8* null, i8** [[TMP29]], align 8
// CHECK1-64-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
// CHECK1-64-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i16**
// CHECK1-64-NEXT:    store i16* [[VLA]], i16** [[TMP31]], align 8
// CHECK1-64-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
// CHECK1-64-NEXT:    [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i16**
// CHECK1-64-NEXT:    store i16* [[VLA]], i16** [[TMP33]], align 8
// CHECK1-64-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
// CHECK1-64-NEXT:    store i64 [[TMP8]], i64* [[TMP34]], align 8
// CHECK1-64-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
// CHECK1-64-NEXT:    store i8* null, i8** [[TMP35]], align 8
// CHECK1-64-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-64-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-64-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
// CHECK1-64-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK1-64-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK1-64-NEXT:    store i32 2, i32* [[TMP39]], align 4
// CHECK1-64-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK1-64-NEXT:    store i32 5, i32* [[TMP40]], align 4
// CHECK1-64-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK1-64-NEXT:    store i8** [[TMP36]], i8*** [[TMP41]], align 8
// CHECK1-64-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK1-64-NEXT:    store i8** [[TMP37]], i8*** [[TMP42]], align 8
// CHECK1-64-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK1-64-NEXT:    store i64* [[TMP38]], i64** [[TMP43]], align 8
// CHECK1-64-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK1-64-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.6, i32 0, i32 0), i64** [[TMP44]], align 8
// CHECK1-64-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK1-64-NEXT:    store i8** null, i8*** [[TMP45]], align 8
// CHECK1-64-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK1-64-NEXT:    store i8** null, i8*** [[TMP46]], align 8
// CHECK1-64-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK1-64-NEXT:    store i64 0, i64* [[TMP47]], align 8
// CHECK1-64-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK1-64-NEXT:    store i64 0, i64* [[TMP48]], align 8
// CHECK1-64-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK1-64-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP49]], align 4
// CHECK1-64-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK1-64-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP50]], align 4
// CHECK1-64-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK1-64-NEXT:    store i32 0, i32* [[TMP51]], align 4
// CHECK1-64-NEXT:    [[TMP52:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK1-64-NEXT:    [[TMP53:%.*]] = icmp ne i32 [[TMP52]], 0
// CHECK1-64-NEXT:    br i1 [[TMP53]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK1-64:       omp_offload.failed:
// CHECK1-64-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR3]]
// CHECK1-64-NEXT:    br label [[OMP_OFFLOAD_CONT]]
// CHECK1-64:       omp_offload.cont:
// CHECK1-64-NEXT:    [[TMP54:%.*]] = mul nsw i64 1, [[TMP2]]
// CHECK1-64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP54]]
// CHECK1-64-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1
// CHECK1-64-NEXT:    [[TMP55:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
// CHECK1-64-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP55]] to i32
// CHECK1-64-NEXT:    [[TMP56:%.*]] = load i32, i32* [[B]], align 4
// CHECK1-64-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP56]]
// CHECK1-64-NEXT:    [[TMP57:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
// CHECK1-64-NEXT:    call void @llvm.stackrestore(i8* [[TMP57]])
// CHECK1-64-NEXT:    ret i32 [[ADD4]]
// CHECK1-64-LABEL: define {{[^@]+}}@_ZL7fstatici
// CHECK1-64-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] {
// CHECK1-64-NEXT:  entry:
// CHECK1-64-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK1-64-NEXT:    [[A:%.*]] = alloca i32, align 4
// CHECK1-64-NEXT:    [[AAA:%.*]] = alloca i8, align 1
// CHECK1-64-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
// CHECK1-64-NEXT:    [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK1-64-NEXT:    [[AAA_CASTED:%.*]] = alloca i64, align 8
// CHECK1-64-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 8
// CHECK1-64-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 8
// CHECK1-64-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 8
// CHECK1-64-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK1-64-NEXT:    store i32 0, i32* [[A]], align 4
// CHECK1-64-NEXT:    store i8 0, i8* [[AAA]], align 1
// CHECK1-64-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
// CHECK1-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32*
// CHECK1-64-NEXT:    store i32 [[TMP0]], i32* [[CONV]], align 4
// CHECK1-64-NEXT:    [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8
// CHECK1-64-NEXT:    [[TMP2:%.*]] = load i8, i8* [[AAA]], align 1
// CHECK1-64-NEXT:    [[CONV1:%.*]] = bitcast i64* [[AAA_CASTED]] to i8*
// CHECK1-64-NEXT:    store i8 [[TMP2]], i8* [[CONV1]], align 1
// CHECK1-64-NEXT:    [[TMP3:%.*]] = load i64, i64* [[AAA_CASTED]], align 8
// CHECK1-64-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-64-NEXT:    [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i64*
// CHECK1-64-NEXT:    store i64 [[TMP1]], i64* [[TMP5]], align 8
// CHECK1-64-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-64-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64*
// CHECK1-64-NEXT:    store i64 [[TMP1]], i64* [[TMP7]], align 8
// CHECK1-64-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
// CHECK1-64-NEXT:    store i8* null, i8** [[TMP8]], align 8
// CHECK1-64-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK1-64-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
// CHECK1-64-NEXT:    store i64 [[TMP3]], i64* [[TMP10]], align 8
// CHECK1-64-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK1-64-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i64*
// CHECK1-64-NEXT:    store i64 [[TMP3]], i64* [[TMP12]], align 8
// CHECK1-64-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
// CHECK1-64-NEXT:    store i8* null, i8** [[TMP13]], align 8
// CHECK1-64-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK1-64-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to [10 x i32]**
// CHECK1-64-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP15]], align 8
// CHECK1-64-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK1-64-NEXT:    [[TMP17:%.*]] = bitcast i8** [[TMP16]] to [10 x i32]**
// CHECK1-64-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP17]], align 8
// CHECK1-64-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
// CHECK1-64-NEXT:    store i8* null, i8** [[TMP18]], align 8
// CHECK1-64-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-64-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-64-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK1-64-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK1-64-NEXT:    store i32 2, i32* [[TMP21]], align 4
// CHECK1-64-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK1-64-NEXT:    store i32 3, i32* [[TMP22]], align 4
// CHECK1-64-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK1-64-NEXT:    store i8** [[TMP19]], i8*** [[TMP23]], align 8
// CHECK1-64-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK1-64-NEXT:    store i8** [[TMP20]], i8*** [[TMP24]], align 8
// CHECK1-64-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK1-64-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.7, i32 0, i32 0), i64** [[TMP25]], align 8
// CHECK1-64-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK1-64-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.8, i32 0, i32 0), i64** [[TMP26]], align 8
// CHECK1-64-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK1-64-NEXT:    store i8** null, i8*** [[TMP27]], align 8
// CHECK1-64-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK1-64-NEXT:    store i8** null, i8*** [[TMP28]], align 8
// CHECK1-64-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK1-64-NEXT:    store i64 0, i64* [[TMP29]], align 8
// CHECK1-64-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK1-64-NEXT:    store i64 0, i64* [[TMP30]], align 8
// CHECK1-64-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK1-64-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP31]], align 4
// CHECK1-64-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK1-64-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP32]], align 4
// CHECK1-64-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK1-64-NEXT:    store i32 0, i32* [[TMP33]], align 4
// CHECK1-64-NEXT:    [[TMP34:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK1-64-NEXT:    [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0
// CHECK1-64-NEXT:    br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK1-64:       omp_offload.failed:
// CHECK1-64-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142(i64 [[TMP1]], i64 [[TMP3]], [10 x i32]* [[B]]) #[[ATTR3]]
// CHECK1-64-NEXT:    br label [[OMP_OFFLOAD_CONT]]
// CHECK1-64:       omp_offload.cont:
// CHECK1-64-NEXT:    [[TMP36:%.*]] = load i32, i32* [[A]], align 4
// CHECK1-64-NEXT:    ret i32 [[TMP36]]
// CHECK1-64-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i
// CHECK1-64-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat {
// CHECK1-64-NEXT:  entry:
// CHECK1-64-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK1-64-NEXT:    [[A:%.*]] = alloca i32, align 4
// CHECK1-64-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
// CHECK1-64-NEXT:    [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK1-64-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x i8*], align 8
// CHECK1-64-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x i8*], align 8
// CHECK1-64-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x i8*], align 8
// CHECK1-64-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK1-64-NEXT:    store i32 0, i32* [[A]], align 4
// CHECK1-64-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
// CHECK1-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32*
// CHECK1-64-NEXT:    store i32 [[TMP0]], i32* [[CONV]], align 4
// CHECK1-64-NEXT:    [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8
// CHECK1-64-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-64-NEXT:    [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i64*
// CHECK1-64-NEXT:    store i64 [[TMP1]], i64* [[TMP3]], align 8
// CHECK1-64-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-64-NEXT:    [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i64*
// CHECK1-64-NEXT:    store i64 [[TMP1]], i64* [[TMP5]], align 8
// CHECK1-64-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
// CHECK1-64-NEXT:    store i8* null, i8** [[TMP6]], align 8
// CHECK1-64-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK1-64-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to [10 x i32]**
// CHECK1-64-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP8]], align 8
// CHECK1-64-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK1-64-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to [10 x i32]**
// CHECK1-64-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP10]], align 8
// CHECK1-64-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
// CHECK1-64-NEXT:    store i8* null, i8** [[TMP11]], align 8
// CHECK1-64-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-64-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-64-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK1-64-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK1-64-NEXT:    store i32 2, i32* [[TMP14]], align 4
// CHECK1-64-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK1-64-NEXT:    store i32 2, i32* [[TMP15]], align 4
// CHECK1-64-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK1-64-NEXT:    store i8** [[TMP12]], i8*** [[TMP16]], align 8
// CHECK1-64-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK1-64-NEXT:    store i8** [[TMP13]], i8*** [[TMP17]], align 8
// CHECK1-64-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK1-64-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.9, i32 0, i32 0), i64** [[TMP18]], align 8
// CHECK1-64-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK1-64-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.10, i32 0, i32 0), i64** [[TMP19]], align 8
// CHECK1-64-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK1-64-NEXT:    store i8** null, i8*** [[TMP20]], align 8
// CHECK1-64-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK1-64-NEXT:    store i8** null, i8*** [[TMP21]], align 8
// CHECK1-64-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK1-64-NEXT:    store i64 0, i64* [[TMP22]], align 8
// CHECK1-64-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK1-64-NEXT:    store i64 0, i64* [[TMP23]], align 8
// CHECK1-64-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK1-64-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP24]], align 4
// CHECK1-64-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK1-64-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP25]], align 4
// CHECK1-64-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK1-64-NEXT:    store i32 0, i32* [[TMP26]], align 4
// CHECK1-64-NEXT:    [[TMP27:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK1-64-NEXT:    [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0
// CHECK1-64-NEXT:    br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK1-64:       omp_offload.failed:
// CHECK1-64-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128(i64 [[TMP1]], [10 x i32]* [[B]]) #[[ATTR3]]
// CHECK1-64-NEXT:    br label [[OMP_OFFLOAD_CONT]]
// CHECK1-64:       omp_offload.cont:
// CHECK1-64-NEXT:    [[TMP29:%.*]] = load i32, i32* [[A]], align 4
// CHECK1-64-NEXT:    ret i32 [[TMP29]]
// CHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167
// CHECK1-64-SAME: (%struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] {
// CHECK1-64-NEXT:  entry:
// CHECK1-64-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
// CHECK1-64-NEXT:    [[B_ADDR:%.*]] = alloca i64, align 8
// CHECK1-64-NEXT:    [[VLA_ADDR:%.*]] = alloca i64, align 8
// CHECK1-64-NEXT:    [[VLA_ADDR2:%.*]] = alloca i64, align 8
// CHECK1-64-NEXT:    [[C_ADDR:%.*]] = alloca i16*, align 8
// CHECK1-64-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 8
// CHECK1-64-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// CHECK1-64-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i64, align 8
// CHECK1-64-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
// CHECK1-64-NEXT:    store i64 [[B]], i64* [[B_ADDR]], align 8
// CHECK1-64-NEXT:    store i64 [[VLA]], i64* [[VLA_ADDR]], align 8
// CHECK1-64-NEXT:    store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8
// CHECK1-64-NEXT:    store i16* [[C]], i16** [[C_ADDR]], align 8
// CHECK1-64-NEXT:    [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
// CHECK1-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32*
// CHECK1-64-NEXT:    [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8
// CHECK1-64-NEXT:    [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8
// CHECK1-64-NEXT:    [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8
// CHECK1-64-NEXT:    [[TMP4:%.*]] = call i8* @llvm.stacksave()
// CHECK1-64-NEXT:    store i8* [[TMP4]], i8** [[SAVED_STACK]], align 8
// CHECK1-64-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]]
// CHECK1-64-NEXT:    [[VLA3:%.*]] = alloca i16, i64 [[TMP5]], align 2
// CHECK1-64-NEXT:    store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8
// CHECK1-64-NEXT:    store i64 [[TMP2]], i64* [[__VLA_EXPR1]], align 8
// CHECK1-64-NEXT:    [[TMP6:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]]
// CHECK1-64-NEXT:    [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 2
// CHECK1-64-NEXT:    [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8*
// CHECK1-64-NEXT:    [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8*
// CHECK1-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i64 [[TMP7]], i1 false)
// CHECK1-64-NEXT:    [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4
// CHECK1-64-NEXT:    [[CONV4:%.*]] = sitofp i32 [[TMP10]] to double
// CHECK1-64-NEXT:    [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00
// CHECK1-64-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0
// CHECK1-64-NEXT:    store double [[ADD]], double* [[A]], align 8
// CHECK1-64-NEXT:    [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0
// CHECK1-64-NEXT:    [[TMP11:%.*]] = load double, double* [[A5]], align 8
// CHECK1-64-NEXT:    [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00
// CHECK1-64-NEXT:    store double [[INC]], double* [[A5]], align 8
// CHECK1-64-NEXT:    [[CONV6:%.*]] = fptosi double [[INC]] to i16
// CHECK1-64-NEXT:    [[TMP12:%.*]] = mul nsw i64 1, [[TMP2]]
// CHECK1-64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i64 [[TMP12]]
// CHECK1-64-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1
// CHECK1-64-NEXT:    store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2
// CHECK1-64-NEXT:    [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
// CHECK1-64-NEXT:    call void @llvm.stackrestore(i8* [[TMP13]])
// CHECK1-64-NEXT:    ret void
// CHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142
// CHECK1-64-SAME: (i64 noundef [[A:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
// CHECK1-64-NEXT:  entry:
// CHECK1-64-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK1-64-NEXT:    [[AAA_ADDR:%.*]] = alloca i64, align 8
// CHECK1-64-NEXT:    [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8
// CHECK1-64-NEXT:    [[B2:%.*]] = alloca [10 x i32], align 4
// CHECK1-64-NEXT:    store i64 [[A]], i64* [[A_ADDR]], align 8
// CHECK1-64-NEXT:    store i64 [[AAA]], i64* [[AAA_ADDR]], align 8
// CHECK1-64-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8
// CHECK1-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
// CHECK1-64-NEXT:    [[CONV1:%.*]] = bitcast i64* [[AAA_ADDR]] to i8*
// CHECK1-64-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8
// CHECK1-64-NEXT:    [[TMP1:%.*]] = bitcast [10 x i32]* [[B2]] to i8*
// CHECK1-64-NEXT:    [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
// CHECK1-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false)
// CHECK1-64-NEXT:    [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4
// CHECK1-64-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// CHECK1-64-NEXT:    store i32 [[ADD]], i32* [[CONV]], align 4
// CHECK1-64-NEXT:    [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 1
// CHECK1-64-NEXT:    [[CONV3:%.*]] = sext i8 [[TMP4]] to i32
// CHECK1-64-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1
// CHECK1-64-NEXT:    [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8
// CHECK1-64-NEXT:    store i8 [[CONV5]], i8* [[CONV1]], align 1
// CHECK1-64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B2]], i64 0, i64 2
// CHECK1-64-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// CHECK1-64-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP5]], 1
// CHECK1-64-NEXT:    store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4
// CHECK1-64-NEXT:    ret void
// CHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128
// CHECK1-64-SAME: (i64 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
// CHECK1-64-NEXT:  entry:
// CHECK1-64-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK1-64-NEXT:    [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8
// CHECK1-64-NEXT:    [[B1:%.*]] = alloca [10 x i32], align 4
// CHECK1-64-NEXT:    store i64 [[A]], i64* [[A_ADDR]], align 8
// CHECK1-64-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8
// CHECK1-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
// CHECK1-64-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8
// CHECK1-64-NEXT:    [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8*
// CHECK1-64-NEXT:    [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
// CHECK1-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false)
// CHECK1-64-NEXT:    [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4
// CHECK1-64-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// CHECK1-64-NEXT:    store i32 [[ADD]], i32* [[CONV]], align 4
// CHECK1-64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i64 0, i64 2
// CHECK1-64-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// CHECK1-64-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1
// CHECK1-64-NEXT:    store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4
// CHECK1-64-NEXT:    ret void
// CHECK1-64-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
// CHECK1-64-SAME: () #[[ATTR5:[0-9]+]] {
// CHECK1-64-NEXT:  entry:
// CHECK1-64-NEXT:    call void @__tgt_register_requires(i64 1)
// CHECK1-64-NEXT:    ret void
// CHECK2-32-LABEL: define {{[^@]+}}@_Z3fooiPd
// CHECK2-32-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK2-32-NEXT:  entry:
// CHECK2-32-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 4
// CHECK2-32-NEXT:    [[A:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT:    [[AA:%.*]] = alloca i16, align 2
// CHECK2-32-NEXT:    [[B:%.*]] = alloca [10 x float], align 4
// CHECK2-32-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 4
// CHECK2-32-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT:    [[C:%.*]] = alloca [5 x [10 x double]], align 8
// CHECK2-32-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT:    [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 4
// CHECK2-32-NEXT:    [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
// CHECK2-32-NEXT:    [[P:%.*]] = alloca i32*, align 64
// CHECK2-32-NEXT:    [[A_CASTED:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT:    [[GA_CASTED:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 4
// CHECK2-32-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 4
// CHECK2-32-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 4
// CHECK2-32-NEXT:    [[AA_CASTED:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT:    [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [9 x i8*], align 4
// CHECK2-32-NEXT:    [[DOTOFFLOAD_PTRS3:%.*]] = alloca [9 x i8*], align 4
// CHECK2-32-NEXT:    [[DOTOFFLOAD_MAPPERS4:%.*]] = alloca [9 x i8*], align 4
// CHECK2-32-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 4
// CHECK2-32-NEXT:    [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [2 x i8*], align 4
// CHECK2-32-NEXT:    [[DOTOFFLOAD_PTRS9:%.*]] = alloca [2 x i8*], align 4
// CHECK2-32-NEXT:    [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [2 x i8*], align 4
// CHECK2-32-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK2-32-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 4
// CHECK2-32-NEXT:    store i32 0, i32* [[A]], align 4
// CHECK2-32-NEXT:    store i16 0, i16* [[AA]], align 2
// CHECK2-32-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK2-32-NEXT:    [[TMP1:%.*]] = call i8* @llvm.stacksave()
// CHECK2-32-NEXT:    store i8* [[TMP1]], i8** [[SAVED_STACK]], align 4
// CHECK2-32-NEXT:    [[VLA:%.*]] = alloca float, i32 [[TMP0]], align 4
// CHECK2-32-NEXT:    store i32 [[TMP0]], i32* [[__VLA_EXPR0]], align 4
// CHECK2-32-NEXT:    [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK2-32-NEXT:    [[TMP3:%.*]] = mul nuw i32 5, [[TMP2]]
// CHECK2-32-NEXT:    [[VLA1:%.*]] = alloca double, i32 [[TMP3]], align 8
// CHECK2-32-NEXT:    store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4
// CHECK2-32-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0
// CHECK2-32-NEXT:    [[TMP4:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK2-32-NEXT:    store i32 [[TMP4]], i32* [[X]], align 4
// CHECK2-32-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1
// CHECK2-32-NEXT:    [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK2-32-NEXT:    store i32 [[TMP5]], i32* [[Y]], align 4
// CHECK2-32-NEXT:    store i32* [[A]], i32** [[P]], align 64
// CHECK2-32-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A]], align 4
// CHECK2-32-NEXT:    store i32 [[TMP6]], i32* [[A_CASTED]], align 4
// CHECK2-32-NEXT:    [[TMP7:%.*]] = load i32, i32* [[A_CASTED]], align 4
// CHECK2-32-NEXT:    [[TMP8:%.*]] = load i32*, i32** [[P]], align 64
// CHECK2-32-NEXT:    [[TMP9:%.*]] = load i32, i32* @ga, align 4
// CHECK2-32-NEXT:    store i32 [[TMP9]], i32* [[GA_CASTED]], align 4
// CHECK2-32-NEXT:    [[TMP10:%.*]] = load i32, i32* [[GA_CASTED]], align 4
// CHECK2-32-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK2-32-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32*
// CHECK2-32-NEXT:    store i32 [[TMP7]], i32* [[TMP12]], align 4
// CHECK2-32-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK2-32-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i32*
// CHECK2-32-NEXT:    store i32 [[TMP7]], i32* [[TMP14]], align 4
// CHECK2-32-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
// CHECK2-32-NEXT:    store i8* null, i8** [[TMP15]], align 4
// CHECK2-32-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK2-32-NEXT:    [[TMP17:%.*]] = bitcast i8** [[TMP16]] to i32**
// CHECK2-32-NEXT:    store i32* [[TMP8]], i32** [[TMP17]], align 4
// CHECK2-32-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK2-32-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
// CHECK2-32-NEXT:    store i32* [[TMP8]], i32** [[TMP19]], align 4
// CHECK2-32-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
// CHECK2-32-NEXT:    store i8* null, i8** [[TMP20]], align 4
// CHECK2-32-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK2-32-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i32*
// CHECK2-32-NEXT:    store i32 [[TMP10]], i32* [[TMP22]], align 4
// CHECK2-32-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK2-32-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32*
// CHECK2-32-NEXT:    store i32 [[TMP10]], i32* [[TMP24]], align 4
// CHECK2-32-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
// CHECK2-32-NEXT:    store i8* null, i8** [[TMP25]], align 4
// CHECK2-32-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK2-32-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK2-32-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK2-32-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK2-32-NEXT:    store i32 2, i32* [[TMP28]], align 4
// CHECK2-32-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK2-32-NEXT:    store i32 3, i32* [[TMP29]], align 4
// CHECK2-32-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK2-32-NEXT:    store i8** [[TMP26]], i8*** [[TMP30]], align 4
// CHECK2-32-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK2-32-NEXT:    store i8** [[TMP27]], i8*** [[TMP31]], align 4
// CHECK2-32-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK2-32-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP32]], align 4
// CHECK2-32-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK2-32-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP33]], align 4
// CHECK2-32-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK2-32-NEXT:    store i8** null, i8*** [[TMP34]], align 4
// CHECK2-32-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK2-32-NEXT:    store i8** null, i8*** [[TMP35]], align 4
// CHECK2-32-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK2-32-NEXT:    store i64 0, i64* [[TMP36]], align 8
// CHECK2-32-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK2-32-NEXT:    store i64 0, i64* [[TMP37]], align 8
// CHECK2-32-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK2-32-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP38]], align 4
// CHECK2-32-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK2-32-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP39]], align 4
// CHECK2-32-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK2-32-NEXT:    store i32 0, i32* [[TMP40]], align 4
// CHECK2-32-NEXT:    [[TMP41:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK2-32-NEXT:    [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0
// CHECK2-32-NEXT:    br i1 [[TMP42]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK2-32:       omp_offload.failed:
// CHECK2-32-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63(i32 [[TMP7]], i32* [[TMP8]], i32 [[TMP10]]) #[[ATTR3:[0-9]+]]
// CHECK2-32-NEXT:    br label [[OMP_OFFLOAD_CONT]]
// CHECK2-32:       omp_offload.cont:
// CHECK2-32-NEXT:    [[TMP43:%.*]] = load i16, i16* [[AA]], align 2
// CHECK2-32-NEXT:    [[CONV:%.*]] = bitcast i32* [[AA_CASTED]] to i16*
// CHECK2-32-NEXT:    store i16 [[TMP43]], i16* [[CONV]], align 2
// CHECK2-32-NEXT:    [[TMP44:%.*]] = load i32, i32* [[AA_CASTED]], align 4
// CHECK2-32-NEXT:    [[TMP45:%.*]] = mul nuw i32 [[TMP0]], 4
// CHECK2-32-NEXT:    [[TMP46:%.*]] = sext i32 [[TMP45]] to i64
// CHECK2-32-NEXT:    [[TMP47:%.*]] = mul nuw i32 5, [[TMP2]]
// CHECK2-32-NEXT:    [[TMP48:%.*]] = mul nuw i32 [[TMP47]], 8
// CHECK2-32-NEXT:    [[TMP49:%.*]] = sext i32 [[TMP48]] to i64
// CHECK2-32-NEXT:    [[TMP50:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
// CHECK2-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP50]], i8* align 4 bitcast ([9 x i64]* @.offload_sizes.1 to i8*), i32 72, i1 false)
// CHECK2-32-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0
// CHECK2-32-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32*
// CHECK2-32-NEXT:    store i32 [[TMP44]], i32* [[TMP52]], align 4
// CHECK2-32-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 0
// CHECK2-32-NEXT:    [[TMP54:%.*]] = bitcast i8** [[TMP53]] to i32*
// CHECK2-32-NEXT:    store i32 [[TMP44]], i32* [[TMP54]], align 4
// CHECK2-32-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 0
// CHECK2-32-NEXT:    store i8* null, i8** [[TMP55]], align 4
// CHECK2-32-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 1
// CHECK2-32-NEXT:    [[TMP57:%.*]] = bitcast i8** [[TMP56]] to [10 x float]**
// CHECK2-32-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP57]], align 4
// CHECK2-32-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 1
// CHECK2-32-NEXT:    [[TMP59:%.*]] = bitcast i8** [[TMP58]] to [10 x float]**
// CHECK2-32-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP59]], align 4
// CHECK2-32-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 1
// CHECK2-32-NEXT:    store i8* null, i8** [[TMP60]], align 4
// CHECK2-32-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 2
// CHECK2-32-NEXT:    [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i32*
// CHECK2-32-NEXT:    store i32 [[TMP0]], i32* [[TMP62]], align 4
// CHECK2-32-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 2
// CHECK2-32-NEXT:    [[TMP64:%.*]] = bitcast i8** [[TMP63]] to i32*
// CHECK2-32-NEXT:    store i32 [[TMP0]], i32* [[TMP64]], align 4
// CHECK2-32-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 2
// CHECK2-32-NEXT:    store i8* null, i8** [[TMP65]], align 4
// CHECK2-32-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 3
// CHECK2-32-NEXT:    [[TMP67:%.*]] = bitcast i8** [[TMP66]] to float**
// CHECK2-32-NEXT:    store float* [[VLA]], float** [[TMP67]], align 4
// CHECK2-32-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 3
// CHECK2-32-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to float**
// CHECK2-32-NEXT:    store float* [[VLA]], float** [[TMP69]], align 4
// CHECK2-32-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
// CHECK2-32-NEXT:    store i64 [[TMP46]], i64* [[TMP70]], align 4
// CHECK2-32-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 3
// CHECK2-32-NEXT:    store i8* null, i8** [[TMP71]], align 4
// CHECK2-32-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 4
// CHECK2-32-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to [5 x [10 x double]]**
// CHECK2-32-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP73]], align 4
// CHECK2-32-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 4
// CHECK2-32-NEXT:    [[TMP75:%.*]] = bitcast i8** [[TMP74]] to [5 x [10 x double]]**
// CHECK2-32-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP75]], align 4
// CHECK2-32-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 4
// CHECK2-32-NEXT:    store i8* null, i8** [[TMP76]], align 4
// CHECK2-32-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 5
// CHECK2-32-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i32*
// CHECK2-32-NEXT:    store i32 5, i32* [[TMP78]], align 4
// CHECK2-32-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 5
// CHECK2-32-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32*
// CHECK2-32-NEXT:    store i32 5, i32* [[TMP80]], align 4
// CHECK2-32-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 5
// CHECK2-32-NEXT:    store i8* null, i8** [[TMP81]], align 4
// CHECK2-32-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 6
// CHECK2-32-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32*
// CHECK2-32-NEXT:    store i32 [[TMP2]], i32* [[TMP83]], align 4
// CHECK2-32-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 6
// CHECK2-32-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i32*
// CHECK2-32-NEXT:    store i32 [[TMP2]], i32* [[TMP85]], align 4
// CHECK2-32-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 6
// CHECK2-32-NEXT:    store i8* null, i8** [[TMP86]], align 4
// CHECK2-32-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 7
// CHECK2-32-NEXT:    [[TMP88:%.*]] = bitcast i8** [[TMP87]] to double**
// CHECK2-32-NEXT:    store double* [[VLA1]], double** [[TMP88]], align 4
// CHECK2-32-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 7
// CHECK2-32-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to double**
// CHECK2-32-NEXT:    store double* [[VLA1]], double** [[TMP90]], align 4
// CHECK2-32-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
// CHECK2-32-NEXT:    store i64 [[TMP49]], i64* [[TMP91]], align 4
// CHECK2-32-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 7
// CHECK2-32-NEXT:    store i8* null, i8** [[TMP92]], align 4
// CHECK2-32-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 8
// CHECK2-32-NEXT:    [[TMP94:%.*]] = bitcast i8** [[TMP93]] to %struct.TT**
// CHECK2-32-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP94]], align 4
// CHECK2-32-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 8
// CHECK2-32-NEXT:    [[TMP96:%.*]] = bitcast i8** [[TMP95]] to %struct.TT**
// CHECK2-32-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP96]], align 4
// CHECK2-32-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 8
// CHECK2-32-NEXT:    store i8* null, i8** [[TMP97]], align 4
// CHECK2-32-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0
// CHECK2-32-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 0
// CHECK2-32-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
// CHECK2-32-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
// CHECK2-32-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 0
// CHECK2-32-NEXT:    store i32 2, i32* [[TMP101]], align 4
// CHECK2-32-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 1
// CHECK2-32-NEXT:    store i32 9, i32* [[TMP102]], align 4
// CHECK2-32-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 2
// CHECK2-32-NEXT:    store i8** [[TMP98]], i8*** [[TMP103]], align 4
// CHECK2-32-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 3
// CHECK2-32-NEXT:    store i8** [[TMP99]], i8*** [[TMP104]], align 4
// CHECK2-32-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 4
// CHECK2-32-NEXT:    store i64* [[TMP100]], i64** [[TMP105]], align 4
// CHECK2-32-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 5
// CHECK2-32-NEXT:    store i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.2, i32 0, i32 0), i64** [[TMP106]], align 4
// CHECK2-32-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 6
// CHECK2-32-NEXT:    store i8** null, i8*** [[TMP107]], align 4
// CHECK2-32-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 7
// CHECK2-32-NEXT:    store i8** null, i8*** [[TMP108]], align 4
// CHECK2-32-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 8
// CHECK2-32-NEXT:    store i64 0, i64* [[TMP109]], align 8
// CHECK2-32-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 9
// CHECK2-32-NEXT:    store i64 0, i64* [[TMP110]], align 8
// CHECK2-32-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 10
// CHECK2-32-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP111]], align 4
// CHECK2-32-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 11
// CHECK2-32-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP112]], align 4
// CHECK2-32-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 12
// CHECK2-32-NEXT:    store i32 0, i32* [[TMP113]], align 4
// CHECK2-32-NEXT:    [[TMP114:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]])
// CHECK2-32-NEXT:    [[TMP115:%.*]] = icmp ne i32 [[TMP114]], 0
// CHECK2-32-NEXT:    br i1 [[TMP115]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]]
// CHECK2-32:       omp_offload.failed6:
// CHECK2-32-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70(i32 [[TMP44]], [10 x float]* [[B]], i32 [[TMP0]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP2]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR3]]
// CHECK2-32-NEXT:    br label [[OMP_OFFLOAD_CONT7]]
// CHECK2-32:       omp_offload.cont7:
// CHECK2-32-NEXT:    [[TMP116:%.*]] = load double*, double** [[PTR_ADDR]], align 4
// CHECK2-32-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
// CHECK2-32-NEXT:    [[TMP118:%.*]] = bitcast i8** [[TMP117]] to double**
// CHECK2-32-NEXT:    store double* [[TMP116]], double** [[TMP118]], align 4
// CHECK2-32-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
// CHECK2-32-NEXT:    [[TMP120:%.*]] = bitcast i8** [[TMP119]] to double**
// CHECK2-32-NEXT:    store double* [[TMP116]], double** [[TMP120]], align 4
// CHECK2-32-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS10]], i32 0, i32 0
// CHECK2-32-NEXT:    store i8* null, i8** [[TMP121]], align 4
// CHECK2-32-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 1
// CHECK2-32-NEXT:    [[TMP123:%.*]] = bitcast i8** [[TMP122]] to %struct.TT.0**
// CHECK2-32-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[TMP123]], align 4
// CHECK2-32-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 1
// CHECK2-32-NEXT:    [[TMP125:%.*]] = bitcast i8** [[TMP124]] to %struct.TT.0**
// CHECK2-32-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[TMP125]], align 4
// CHECK2-32-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS10]], i32 0, i32 1
// CHECK2-32-NEXT:    store i8* null, i8** [[TMP126]], align 4
// CHECK2-32-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
// CHECK2-32-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
// CHECK2-32-NEXT:    [[KERNEL_ARGS11:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
// CHECK2-32-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 0
// CHECK2-32-NEXT:    store i32 2, i32* [[TMP129]], align 4
// CHECK2-32-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 1
// CHECK2-32-NEXT:    store i32 2, i32* [[TMP130]], align 4
// CHECK2-32-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 2
// CHECK2-32-NEXT:    store i8** [[TMP127]], i8*** [[TMP131]], align 4
// CHECK2-32-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 3
// CHECK2-32-NEXT:    store i8** [[TMP128]], i8*** [[TMP132]], align 4
// CHECK2-32-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 4
// CHECK2-32-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.3, i32 0, i32 0), i64** [[TMP133]], align 4
// CHECK2-32-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 5
// CHECK2-32-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.4, i32 0, i32 0), i64** [[TMP134]], align 4
// CHECK2-32-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 6
// CHECK2-32-NEXT:    store i8** null, i8*** [[TMP135]], align 4
// CHECK2-32-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 7
// CHECK2-32-NEXT:    store i8** null, i8*** [[TMP136]], align 4
// CHECK2-32-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 8
// CHECK2-32-NEXT:    store i64 0, i64* [[TMP137]], align 8
// CHECK2-32-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 9
// CHECK2-32-NEXT:    store i64 0, i64* [[TMP138]], align 8
// CHECK2-32-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 10
// CHECK2-32-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP139]], align 4
// CHECK2-32-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 11
// CHECK2-32-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP140]], align 4
// CHECK2-32-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 12
// CHECK2-32-NEXT:    store i32 0, i32* [[TMP141]], align 4
// CHECK2-32-NEXT:    [[TMP142:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]])
// CHECK2-32-NEXT:    [[TMP143:%.*]] = icmp ne i32 [[TMP142]], 0
// CHECK2-32-NEXT:    br i1 [[TMP143]], label [[OMP_OFFLOAD_FAILED12:%.*]], label [[OMP_OFFLOAD_CONT13:%.*]]
// CHECK2-32:       omp_offload.failed12:
// CHECK2-32-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111(double* [[TMP116]], %struct.TT.0* [[E]]) #[[ATTR3]]
// CHECK2-32-NEXT:    br label [[OMP_OFFLOAD_CONT13]]
// CHECK2-32:       omp_offload.cont13:
// CHECK2-32-NEXT:    [[TMP144:%.*]] = load i32, i32* [[A]], align 4
// CHECK2-32-NEXT:    [[TMP145:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
// CHECK2-32-NEXT:    call void @llvm.stackrestore(i8* [[TMP145]])
// CHECK2-32-NEXT:    ret i32 [[TMP144]]
// CHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63
// CHECK2-32-SAME: (i32 noundef [[A:%.*]], i32* noundef [[P:%.*]], i32 noundef [[GA:%.*]]) #[[ATTR2:[0-9]+]] {
// CHECK2-32-NEXT:  entry:
// CHECK2-32-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT:    [[P_ADDR:%.*]] = alloca i32*, align 4
// CHECK2-32-NEXT:    [[GA_ADDR:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
// CHECK2-32-NEXT:    store i32* [[P]], i32** [[P_ADDR]], align 4
// CHECK2-32-NEXT:    store i32 [[GA]], i32* [[GA_ADDR]], align 4
// CHECK2-32-NEXT:    ret void
// CHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70
// CHECK2-32-SAME: (i32 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR2]] {
// CHECK2-32-NEXT:  entry:
// CHECK2-32-NEXT:    [[AA_ADDR:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT:    [[B_ADDR:%.*]] = alloca [10 x float]*, align 4
// CHECK2-32-NEXT:    [[VLA_ADDR:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT:    [[BN_ADDR:%.*]] = alloca float*, align 4
// CHECK2-32-NEXT:    [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 4
// CHECK2-32-NEXT:    [[VLA_ADDR2:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT:    [[VLA_ADDR4:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT:    [[CN_ADDR:%.*]] = alloca double*, align 4
// CHECK2-32-NEXT:    [[D_ADDR:%.*]] = alloca %struct.TT*, align 4
// CHECK2-32-NEXT:    [[B5:%.*]] = alloca [10 x float], align 4
// CHECK2-32-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 4
// CHECK2-32-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT:    [[C7:%.*]] = alloca [5 x [10 x double]], align 8
// CHECK2-32-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT:    [[__VLA_EXPR2:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT:    [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 4
// CHECK2-32-NEXT:    store i32 [[AA]], i32* [[AA_ADDR]], align 4
// CHECK2-32-NEXT:    store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4
// CHECK2-32-NEXT:    store i32 [[VLA]], i32* [[VLA_ADDR]], align 4
// CHECK2-32-NEXT:    store float* [[BN]], float** [[BN_ADDR]], align 4
// CHECK2-32-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 4
// CHECK2-32-NEXT:    store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4
// CHECK2-32-NEXT:    store i32 [[VLA3]], i32* [[VLA_ADDR4]], align 4
// CHECK2-32-NEXT:    store double* [[CN]], double** [[CN_ADDR]], align 4
// CHECK2-32-NEXT:    store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 4
// CHECK2-32-NEXT:    [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16*
// CHECK2-32-NEXT:    [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 4
// CHECK2-32-NEXT:    [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4
// CHECK2-32-NEXT:    [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 4
// CHECK2-32-NEXT:    [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 4
// CHECK2-32-NEXT:    [[TMP4:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4
// CHECK2-32-NEXT:    [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4
// CHECK2-32-NEXT:    [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4
// CHECK2-32-NEXT:    [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4
// CHECK2-32-NEXT:    [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8*
// CHECK2-32-NEXT:    [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8*
// CHECK2-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i32 40, i1 false)
// CHECK2-32-NEXT:    [[TMP10:%.*]] = call i8* @llvm.stacksave()
// CHECK2-32-NEXT:    store i8* [[TMP10]], i8** [[SAVED_STACK]], align 4
// CHECK2-32-NEXT:    [[VLA6:%.*]] = alloca float, i32 [[TMP1]], align 4
// CHECK2-32-NEXT:    store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4
// CHECK2-32-NEXT:    [[TMP11:%.*]] = mul nuw i32 [[TMP1]], 4
// CHECK2-32-NEXT:    [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8*
// CHECK2-32-NEXT:    [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8*
// CHECK2-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 [[TMP11]], i1 false)
// CHECK2-32-NEXT:    [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8*
// CHECK2-32-NEXT:    [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8*
// CHECK2-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i32 400, i1 false)
// CHECK2-32-NEXT:    [[TMP16:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]]
// CHECK2-32-NEXT:    [[VLA8:%.*]] = alloca double, i32 [[TMP16]], align 8
// CHECK2-32-NEXT:    store i32 [[TMP4]], i32* [[__VLA_EXPR1]], align 4
// CHECK2-32-NEXT:    store i32 [[TMP5]], i32* [[__VLA_EXPR2]], align 4
// CHECK2-32-NEXT:    [[TMP17:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]]
// CHECK2-32-NEXT:    [[TMP18:%.*]] = mul nuw i32 [[TMP17]], 8
// CHECK2-32-NEXT:    [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8*
// CHECK2-32-NEXT:    [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8*
// CHECK2-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i32 [[TMP18]], i1 false)
// CHECK2-32-NEXT:    [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8*
// CHECK2-32-NEXT:    [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8*
// CHECK2-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP21]], i8* align 4 [[TMP22]], i32 12, i1 false)
// CHECK2-32-NEXT:    [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2
// CHECK2-32-NEXT:    [[CONV10:%.*]] = sext i16 [[TMP23]] to i32
// CHECK2-32-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV10]], 1
// CHECK2-32-NEXT:    [[CONV11:%.*]] = trunc i32 [[ADD]] to i16
// CHECK2-32-NEXT:    store i16 [[CONV11]], i16* [[CONV]], align 2
// CHECK2-32-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i32 0, i32 2
// CHECK2-32-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX]], align 4
// CHECK2-32-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i32 3
// CHECK2-32-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX12]], align 4
// CHECK2-32-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i32 0, i32 1
// CHECK2-32-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i32 0, i32 2
// CHECK2-32-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX14]], align 8
// CHECK2-32-NEXT:    [[TMP24:%.*]] = mul nsw i32 1, [[TMP5]]
// CHECK2-32-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i32 [[TMP24]]
// CHECK2-32-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i32 3
// CHECK2-32-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX16]], align 8
// CHECK2-32-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0
// CHECK2-32-NEXT:    store i64 1, i64* [[X]], align 4
// CHECK2-32-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1
// CHECK2-32-NEXT:    store i8 1, i8* [[Y]], align 4
// CHECK2-32-NEXT:    [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
// CHECK2-32-NEXT:    call void @llvm.stackrestore(i8* [[TMP25]])
// CHECK2-32-NEXT:    ret void
// CHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111
// CHECK2-32-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR2]] {
// CHECK2-32-NEXT:  entry:
// CHECK2-32-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 4
// CHECK2-32-NEXT:    [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 4
// CHECK2-32-NEXT:    [[E1:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
// CHECK2-32-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 4
// CHECK2-32-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 4
// CHECK2-32-NEXT:    [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 4
// CHECK2-32-NEXT:    [[TMP1:%.*]] = bitcast %struct.TT.0* [[E1]] to i8*
// CHECK2-32-NEXT:    [[TMP2:%.*]] = bitcast %struct.TT.0* [[TMP0]] to i8*
// CHECK2-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 8, i1 false)
// CHECK2-32-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E1]], i32 0, i32 0
// CHECK2-32-NEXT:    [[TMP3:%.*]] = load i32, i32* [[X]], align 4
// CHECK2-32-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP3]] to double
// CHECK2-32-NEXT:    [[TMP4:%.*]] = load double*, double** [[PTR_ADDR]], align 4
// CHECK2-32-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP4]], i32 0
// CHECK2-32-NEXT:    store double [[CONV]], double* [[ARRAYIDX]], align 4
// CHECK2-32-NEXT:    [[TMP5:%.*]] = load double*, double** [[PTR_ADDR]], align 4
// CHECK2-32-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[TMP5]], i32 0
// CHECK2-32-NEXT:    [[TMP6:%.*]] = load double, double* [[ARRAYIDX2]], align 4
// CHECK2-32-NEXT:    [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00
// CHECK2-32-NEXT:    store double [[INC]], double* [[ARRAYIDX2]], align 4
// CHECK2-32-NEXT:    ret void
// CHECK2-32-LABEL: define {{[^@]+}}@_Z3bariPd
// CHECK2-32-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] {
// CHECK2-32-NEXT:  entry:
// CHECK2-32-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 4
// CHECK2-32-NEXT:    [[A:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT:    [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 4
// CHECK2-32-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK2-32-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 4
// CHECK2-32-NEXT:    store i32 0, i32* [[A]], align 4
// CHECK2-32-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK2-32-NEXT:    [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 4
// CHECK2-32-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z3fooiPd(i32 noundef [[TMP0]], double* noundef [[TMP1]])
// CHECK2-32-NEXT:    [[TMP2:%.*]] = load i32, i32* [[A]], align 4
// CHECK2-32-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]]
// CHECK2-32-NEXT:    store i32 [[ADD]], i32* [[A]], align 4
// CHECK2-32-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK2-32-NEXT:    [[CALL1:%.*]] = call noundef i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 4 dereferenceable(8) [[S]], i32 noundef [[TMP3]])
// CHECK2-32-NEXT:    [[TMP4:%.*]] = load i32, i32* [[A]], align 4
// CHECK2-32-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]]
// CHECK2-32-NEXT:    store i32 [[ADD2]], i32* [[A]], align 4
// CHECK2-32-NEXT:    [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK2-32-NEXT:    [[CALL3:%.*]] = call noundef i32 @_ZL7fstatici(i32 noundef [[TMP5]])
// CHECK2-32-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A]], align 4
// CHECK2-32-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]]
// CHECK2-32-NEXT:    store i32 [[ADD4]], i32* [[A]], align 4
// CHECK2-32-NEXT:    [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK2-32-NEXT:    [[CALL5:%.*]] = call noundef i32 @_Z9ftemplateIiET_i(i32 noundef [[TMP7]])
// CHECK2-32-NEXT:    [[TMP8:%.*]] = load i32, i32* [[A]], align 4
// CHECK2-32-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]]
// CHECK2-32-NEXT:    store i32 [[ADD6]], i32* [[A]], align 4
// CHECK2-32-NEXT:    [[TMP9:%.*]] = load i32, i32* [[A]], align 4
// CHECK2-32-NEXT:    ret i32 [[TMP9]]
// CHECK2-32-LABEL: define {{[^@]+}}@_ZN2S12r1Ei
// CHECK2-32-SAME: (%struct.S1* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[N:%.*]]) #[[ATTR0]] comdat align 2 {
// CHECK2-32-NEXT:  entry:
// CHECK2-32-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4
// CHECK2-32-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT:    [[B:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 4
// CHECK2-32-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT:    [[B_CASTED:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [5 x i8*], align 4
// CHECK2-32-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x i8*], align 4
// CHECK2-32-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x i8*], align 4
// CHECK2-32-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 4
// CHECK2-32-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4
// CHECK2-32-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK2-32-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4
// CHECK2-32-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK2-32-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// CHECK2-32-NEXT:    store i32 [[ADD]], i32* [[B]], align 4
// CHECK2-32-NEXT:    [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK2-32-NEXT:    [[TMP2:%.*]] = call i8* @llvm.stacksave()
// CHECK2-32-NEXT:    store i8* [[TMP2]], i8** [[SAVED_STACK]], align 4
// CHECK2-32-NEXT:    [[TMP3:%.*]] = mul nuw i32 2, [[TMP1]]
// CHECK2-32-NEXT:    [[VLA:%.*]] = alloca i16, i32 [[TMP3]], align 2
// CHECK2-32-NEXT:    store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4
// CHECK2-32-NEXT:    [[TMP4:%.*]] = load i32, i32* [[B]], align 4
// CHECK2-32-NEXT:    store i32 [[TMP4]], i32* [[B_CASTED]], align 4
// CHECK2-32-NEXT:    [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4
// CHECK2-32-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
// CHECK2-32-NEXT:    [[TMP6:%.*]] = mul nuw i32 2, [[TMP1]]
// CHECK2-32-NEXT:    [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2
// CHECK2-32-NEXT:    [[TMP8:%.*]] = sext i32 [[TMP7]] to i64
// CHECK2-32-NEXT:    [[TMP9:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
// CHECK2-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP9]], i8* align 4 bitcast ([5 x i64]* @.offload_sizes.5 to i8*), i32 40, i1 false)
// CHECK2-32-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK2-32-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
// CHECK2-32-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 4
// CHECK2-32-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK2-32-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
// CHECK2-32-NEXT:    store double* [[A]], double** [[TMP13]], align 4
// CHECK2-32-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
// CHECK2-32-NEXT:    store i8* null, i8** [[TMP14]], align 4
// CHECK2-32-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK2-32-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i32*
// CHECK2-32-NEXT:    store i32 [[TMP5]], i32* [[TMP16]], align 4
// CHECK2-32-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK2-32-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32*
// CHECK2-32-NEXT:    store i32 [[TMP5]], i32* [[TMP18]], align 4
// CHECK2-32-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
// CHECK2-32-NEXT:    store i8* null, i8** [[TMP19]], align 4
// CHECK2-32-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK2-32-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32*
// CHECK2-32-NEXT:    store i32 2, i32* [[TMP21]], align 4
// CHECK2-32-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK2-32-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32*
// CHECK2-32-NEXT:    store i32 2, i32* [[TMP23]], align 4
// CHECK2-32-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
// CHECK2-32-NEXT:    store i8* null, i8** [[TMP24]], align 4
// CHECK2-32-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
// CHECK2-32-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i32*
// CHECK2-32-NEXT:    store i32 [[TMP1]], i32* [[TMP26]], align 4
// CHECK2-32-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
// CHECK2-32-NEXT:    [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i32*
// CHECK2-32-NEXT:    store i32 [[TMP1]], i32* [[TMP28]], align 4
// CHECK2-32-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
// CHECK2-32-NEXT:    store i8* null, i8** [[TMP29]], align 4
// CHECK2-32-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
// CHECK2-32-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i16**
// CHECK2-32-NEXT:    store i16* [[VLA]], i16** [[TMP31]], align 4
// CHECK2-32-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
// CHECK2-32-NEXT:    [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i16**
// CHECK2-32-NEXT:    store i16* [[VLA]], i16** [[TMP33]], align 4
// CHECK2-32-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
// CHECK2-32-NEXT:    store i64 [[TMP8]], i64* [[TMP34]], align 4
// CHECK2-32-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
// CHECK2-32-NEXT:    store i8* null, i8** [[TMP35]], align 4
// CHECK2-32-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK2-32-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK2-32-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
// CHECK2-32-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK2-32-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK2-32-NEXT:    store i32 2, i32* [[TMP39]], align 4
// CHECK2-32-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK2-32-NEXT:    store i32 5, i32* [[TMP40]], align 4
// CHECK2-32-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK2-32-NEXT:    store i8** [[TMP36]], i8*** [[TMP41]], align 4
// CHECK2-32-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK2-32-NEXT:    store i8** [[TMP37]], i8*** [[TMP42]], align 4
// CHECK2-32-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK2-32-NEXT:    store i64* [[TMP38]], i64** [[TMP43]], align 4
// CHECK2-32-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK2-32-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.6, i32 0, i32 0), i64** [[TMP44]], align 4
// CHECK2-32-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK2-32-NEXT:    store i8** null, i8*** [[TMP45]], align 4
// CHECK2-32-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK2-32-NEXT:    store i8** null, i8*** [[TMP46]], align 4
// CHECK2-32-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK2-32-NEXT:    store i64 0, i64* [[TMP47]], align 8
// CHECK2-32-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK2-32-NEXT:    store i64 0, i64* [[TMP48]], align 8
// CHECK2-32-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK2-32-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP49]], align 4
// CHECK2-32-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK2-32-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP50]], align 4
// CHECK2-32-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK2-32-NEXT:    store i32 0, i32* [[TMP51]], align 4
// CHECK2-32-NEXT:    [[TMP52:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK2-32-NEXT:    [[TMP53:%.*]] = icmp ne i32 [[TMP52]], 0
// CHECK2-32-NEXT:    br i1 [[TMP53]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK2-32:       omp_offload.failed:
// CHECK2-32-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR3]]
// CHECK2-32-NEXT:    br label [[OMP_OFFLOAD_CONT]]
// CHECK2-32:       omp_offload.cont:
// CHECK2-32-NEXT:    [[TMP54:%.*]] = mul nsw i32 1, [[TMP1]]
// CHECK2-32-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP54]]
// CHECK2-32-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1
// CHECK2-32-NEXT:    [[TMP55:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
// CHECK2-32-NEXT:    [[CONV:%.*]] = sext i16 [[TMP55]] to i32
// CHECK2-32-NEXT:    [[TMP56:%.*]] = load i32, i32* [[B]], align 4
// CHECK2-32-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP56]]
// CHECK2-32-NEXT:    [[TMP57:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
// CHECK2-32-NEXT:    call void @llvm.stackrestore(i8* [[TMP57]])
// CHECK2-32-NEXT:    ret i32 [[ADD3]]
// CHECK2-32-LABEL: define {{[^@]+}}@_ZL7fstatici
// CHECK2-32-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] {
// CHECK2-32-NEXT:  entry:
// CHECK2-32-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT:    [[A:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT:    [[AAA:%.*]] = alloca i8, align 1
// CHECK2-32-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
// CHECK2-32-NEXT:    [[A_CASTED:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT:    [[AAA_CASTED:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 4
// CHECK2-32-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 4
// CHECK2-32-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 4
// CHECK2-32-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK2-32-NEXT:    store i32 0, i32* [[A]], align 4
// CHECK2-32-NEXT:    store i8 0, i8* [[AAA]], align 1
// CHECK2-32-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
// CHECK2-32-NEXT:    store i32 [[TMP0]], i32* [[A_CASTED]], align 4
// CHECK2-32-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4
// CHECK2-32-NEXT:    [[TMP2:%.*]] = load i8, i8* [[AAA]], align 1
// CHECK2-32-NEXT:    [[CONV:%.*]] = bitcast i32* [[AAA_CASTED]] to i8*
// CHECK2-32-NEXT:    store i8 [[TMP2]], i8* [[CONV]], align 1
// CHECK2-32-NEXT:    [[TMP3:%.*]] = load i32, i32* [[AAA_CASTED]], align 4
// CHECK2-32-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK2-32-NEXT:    [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i32*
// CHECK2-32-NEXT:    store i32 [[TMP1]], i32* [[TMP5]], align 4
// CHECK2-32-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK2-32-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32*
// CHECK2-32-NEXT:    store i32 [[TMP1]], i32* [[TMP7]], align 4
// CHECK2-32-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
// CHECK2-32-NEXT:    store i8* null, i8** [[TMP8]], align 4
// CHECK2-32-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK2-32-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
// CHECK2-32-NEXT:    store i32 [[TMP3]], i32* [[TMP10]], align 4
// CHECK2-32-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK2-32-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32*
// CHECK2-32-NEXT:    store i32 [[TMP3]], i32* [[TMP12]], align 4
// CHECK2-32-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
// CHECK2-32-NEXT:    store i8* null, i8** [[TMP13]], align 4
// CHECK2-32-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK2-32-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to [10 x i32]**
// CHECK2-32-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP15]], align 4
// CHECK2-32-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK2-32-NEXT:    [[TMP17:%.*]] = bitcast i8** [[TMP16]] to [10 x i32]**
// CHECK2-32-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP17]], align 4
// CHECK2-32-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
// CHECK2-32-NEXT:    store i8* null, i8** [[TMP18]], align 4
// CHECK2-32-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK2-32-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK2-32-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK2-32-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK2-32-NEXT:    store i32 2, i32* [[TMP21]], align 4
// CHECK2-32-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK2-32-NEXT:    store i32 3, i32* [[TMP22]], align 4
// CHECK2-32-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK2-32-NEXT:    store i8** [[TMP19]], i8*** [[TMP23]], align 4
// CHECK2-32-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK2-32-NEXT:    store i8** [[TMP20]], i8*** [[TMP24]], align 4
// CHECK2-32-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK2-32-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.7, i32 0, i32 0), i64** [[TMP25]], align 4
// CHECK2-32-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK2-32-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.8, i32 0, i32 0), i64** [[TMP26]], align 4
// CHECK2-32-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK2-32-NEXT:    store i8** null, i8*** [[TMP27]], align 4
// CHECK2-32-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK2-32-NEXT:    store i8** null, i8*** [[TMP28]], align 4
// CHECK2-32-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK2-32-NEXT:    store i64 0, i64* [[TMP29]], align 8
// CHECK2-32-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK2-32-NEXT:    store i64 0, i64* [[TMP30]], align 8
// CHECK2-32-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK2-32-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP31]], align 4
// CHECK2-32-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK2-32-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP32]], align 4
// CHECK2-32-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK2-32-NEXT:    store i32 0, i32* [[TMP33]], align 4
// CHECK2-32-NEXT:    [[TMP34:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK2-32-NEXT:    [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0
// CHECK2-32-NEXT:    br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK2-32:       omp_offload.failed:
// CHECK2-32-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142(i32 [[TMP1]], i32 [[TMP3]], [10 x i32]* [[B]]) #[[ATTR3]]
// CHECK2-32-NEXT:    br label [[OMP_OFFLOAD_CONT]]
// CHECK2-32:       omp_offload.cont:
// CHECK2-32-NEXT:    [[TMP36:%.*]] = load i32, i32* [[A]], align 4
// CHECK2-32-NEXT:    ret i32 [[TMP36]]
// CHECK2-32-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i
// CHECK2-32-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] comdat {
// CHECK2-32-NEXT:  entry:
// CHECK2-32-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT:    [[A:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
// CHECK2-32-NEXT:    [[A_CASTED:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x i8*], align 4
// CHECK2-32-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x i8*], align 4
// CHECK2-32-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x i8*], align 4
// CHECK2-32-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK2-32-NEXT:    store i32 0, i32* [[A]], align 4
// CHECK2-32-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
// CHECK2-32-NEXT:    store i32 [[TMP0]], i32* [[A_CASTED]], align 4
// CHECK2-32-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4
// CHECK2-32-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK2-32-NEXT:    [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32*
// CHECK2-32-NEXT:    store i32 [[TMP1]], i32* [[TMP3]], align 4
// CHECK2-32-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK2-32-NEXT:    [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i32*
// CHECK2-32-NEXT:    store i32 [[TMP1]], i32* [[TMP5]], align 4
// CHECK2-32-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
// CHECK2-32-NEXT:    store i8* null, i8** [[TMP6]], align 4
// CHECK2-32-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK2-32-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to [10 x i32]**
// CHECK2-32-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP8]], align 4
// CHECK2-32-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK2-32-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to [10 x i32]**
// CHECK2-32-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP10]], align 4
// CHECK2-32-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
// CHECK2-32-NEXT:    store i8* null, i8** [[TMP11]], align 4
// CHECK2-32-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK2-32-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK2-32-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK2-32-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK2-32-NEXT:    store i32 2, i32* [[TMP14]], align 4
// CHECK2-32-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK2-32-NEXT:    store i32 2, i32* [[TMP15]], align 4
// CHECK2-32-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK2-32-NEXT:    store i8** [[TMP12]], i8*** [[TMP16]], align 4
// CHECK2-32-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK2-32-NEXT:    store i8** [[TMP13]], i8*** [[TMP17]], align 4
// CHECK2-32-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK2-32-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.9, i32 0, i32 0), i64** [[TMP18]], align 4
// CHECK2-32-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK2-32-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.10, i32 0, i32 0), i64** [[TMP19]], align 4
// CHECK2-32-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK2-32-NEXT:    store i8** null, i8*** [[TMP20]], align 4
// CHECK2-32-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK2-32-NEXT:    store i8** null, i8*** [[TMP21]], align 4
// CHECK2-32-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK2-32-NEXT:    store i64 0, i64* [[TMP22]], align 8
// CHECK2-32-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK2-32-NEXT:    store i64 0, i64* [[TMP23]], align 8
// CHECK2-32-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK2-32-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP24]], align 4
// CHECK2-32-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK2-32-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP25]], align 4
// CHECK2-32-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK2-32-NEXT:    store i32 0, i32* [[TMP26]], align 4
// CHECK2-32-NEXT:    [[TMP27:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK2-32-NEXT:    [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0
// CHECK2-32-NEXT:    br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK2-32:       omp_offload.failed:
// CHECK2-32-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128(i32 [[TMP1]], [10 x i32]* [[B]]) #[[ATTR3]]
// CHECK2-32-NEXT:    br label [[OMP_OFFLOAD_CONT]]
// CHECK2-32:       omp_offload.cont:
// CHECK2-32-NEXT:    [[TMP29:%.*]] = load i32, i32* [[A]], align 4
// CHECK2-32-NEXT:    ret i32 [[TMP29]]
// CHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167
// CHECK2-32-SAME: (%struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] {
// CHECK2-32-NEXT:  entry:
// CHECK2-32-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4
// CHECK2-32-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT:    [[VLA_ADDR:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT:    [[VLA_ADDR2:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT:    [[C_ADDR:%.*]] = alloca i16*, align 4
// CHECK2-32-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 4
// CHECK2-32-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4
// CHECK2-32-NEXT:    store i32 [[B]], i32* [[B_ADDR]], align 4
// CHECK2-32-NEXT:    store i32 [[VLA]], i32* [[VLA_ADDR]], align 4
// CHECK2-32-NEXT:    store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4
// CHECK2-32-NEXT:    store i16* [[C]], i16** [[C_ADDR]], align 4
// CHECK2-32-NEXT:    [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4
// CHECK2-32-NEXT:    [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4
// CHECK2-32-NEXT:    [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4
// CHECK2-32-NEXT:    [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4
// CHECK2-32-NEXT:    [[TMP4:%.*]] = call i8* @llvm.stacksave()
// CHECK2-32-NEXT:    store i8* [[TMP4]], i8** [[SAVED_STACK]], align 4
// CHECK2-32-NEXT:    [[TMP5:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
// CHECK2-32-NEXT:    [[VLA3:%.*]] = alloca i16, i32 [[TMP5]], align 2
// CHECK2-32-NEXT:    store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4
// CHECK2-32-NEXT:    store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4
// CHECK2-32-NEXT:    [[TMP6:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
// CHECK2-32-NEXT:    [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2
// CHECK2-32-NEXT:    [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8*
// CHECK2-32-NEXT:    [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8*
// CHECK2-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i32 [[TMP7]], i1 false)
// CHECK2-32-NEXT:    [[TMP10:%.*]] = load i32, i32* [[B_ADDR]], align 4
// CHECK2-32-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP10]] to double
// CHECK2-32-NEXT:    [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00
// CHECK2-32-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0
// CHECK2-32-NEXT:    store double [[ADD]], double* [[A]], align 4
// CHECK2-32-NEXT:    [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0
// CHECK2-32-NEXT:    [[TMP11:%.*]] = load double, double* [[A4]], align 4
// CHECK2-32-NEXT:    [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00
// CHECK2-32-NEXT:    store double [[INC]], double* [[A4]], align 4
// CHECK2-32-NEXT:    [[CONV5:%.*]] = fptosi double [[INC]] to i16
// CHECK2-32-NEXT:    [[TMP12:%.*]] = mul nsw i32 1, [[TMP2]]
// CHECK2-32-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i32 [[TMP12]]
// CHECK2-32-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1
// CHECK2-32-NEXT:    store i16 [[CONV5]], i16* [[ARRAYIDX6]], align 2
// CHECK2-32-NEXT:    [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
// CHECK2-32-NEXT:    call void @llvm.stackrestore(i8* [[TMP13]])
// CHECK2-32-NEXT:    ret void
// CHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142
// CHECK2-32-SAME: (i32 noundef [[A:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
// CHECK2-32-NEXT:  entry:
// CHECK2-32-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT:    [[AAA_ADDR:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT:    [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4
// CHECK2-32-NEXT:    [[B1:%.*]] = alloca [10 x i32], align 4
// CHECK2-32-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
// CHECK2-32-NEXT:    store i32 [[AAA]], i32* [[AAA_ADDR]], align 4
// CHECK2-32-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4
// CHECK2-32-NEXT:    [[CONV:%.*]] = bitcast i32* [[AAA_ADDR]] to i8*
// CHECK2-32-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4
// CHECK2-32-NEXT:    [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8*
// CHECK2-32-NEXT:    [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
// CHECK2-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false)
// CHECK2-32-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
// CHECK2-32-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// CHECK2-32-NEXT:    store i32 [[ADD]], i32* [[A_ADDR]], align 4
// CHECK2-32-NEXT:    [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1
// CHECK2-32-NEXT:    [[CONV2:%.*]] = sext i8 [[TMP4]] to i32
// CHECK2-32-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1
// CHECK2-32-NEXT:    [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8
// CHECK2-32-NEXT:    store i8 [[CONV4]], i8* [[CONV]], align 1
// CHECK2-32-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2
// CHECK2-32-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// CHECK2-32-NEXT:    [[ADD5:%.*]] = add nsw i32 [[TMP5]], 1
// CHECK2-32-NEXT:    store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4
// CHECK2-32-NEXT:    ret void
// CHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128
// CHECK2-32-SAME: (i32 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
// CHECK2-32-NEXT:  entry:
// CHECK2-32-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
// CHECK2-32-NEXT:    [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4
// CHECK2-32-NEXT:    [[B1:%.*]] = alloca [10 x i32], align 4
// CHECK2-32-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
// CHECK2-32-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4
// CHECK2-32-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4
// CHECK2-32-NEXT:    [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8*
// CHECK2-32-NEXT:    [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
// CHECK2-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false)
// CHECK2-32-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
// CHECK2-32-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// CHECK2-32-NEXT:    store i32 [[ADD]], i32* [[A_ADDR]], align 4
// CHECK2-32-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2
// CHECK2-32-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// CHECK2-32-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1
// CHECK2-32-NEXT:    store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4
// CHECK2-32-NEXT:    ret void
// CHECK2-32-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
// CHECK2-32-SAME: () #[[ATTR5:[0-9]+]] {
// CHECK2-32-NEXT:  entry:
// CHECK2-32-NEXT:    call void @__tgt_register_requires(i64 1)
// CHECK2-32-NEXT:    ret void
// CHECK3-32-LABEL: define {{[^@]+}}@_Z3fooiPd
// CHECK3-32-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK3-32-NEXT:  entry:
// CHECK3-32-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 4
// CHECK3-32-NEXT:    [[A:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT:    [[AA:%.*]] = alloca i16, align 2
// CHECK3-32-NEXT:    [[B:%.*]] = alloca [10 x float], align 4
// CHECK3-32-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 4
// CHECK3-32-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT:    [[C:%.*]] = alloca [5 x [10 x double]], align 8
// CHECK3-32-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT:    [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 4
// CHECK3-32-NEXT:    [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
// CHECK3-32-NEXT:    [[P:%.*]] = alloca i32*, align 64
// CHECK3-32-NEXT:    [[A_CASTED:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT:    [[GA_CASTED:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 4
// CHECK3-32-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 4
// CHECK3-32-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 4
// CHECK3-32-NEXT:    [[AA_CASTED:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT:    [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [9 x i8*], align 4
// CHECK3-32-NEXT:    [[DOTOFFLOAD_PTRS3:%.*]] = alloca [9 x i8*], align 4
// CHECK3-32-NEXT:    [[DOTOFFLOAD_MAPPERS4:%.*]] = alloca [9 x i8*], align 4
// CHECK3-32-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 4
// CHECK3-32-NEXT:    [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [2 x i8*], align 4
// CHECK3-32-NEXT:    [[DOTOFFLOAD_PTRS9:%.*]] = alloca [2 x i8*], align 4
// CHECK3-32-NEXT:    [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [2 x i8*], align 4
// CHECK3-32-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK3-32-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 4
// CHECK3-32-NEXT:    store i32 0, i32* [[A]], align 4
// CHECK3-32-NEXT:    store i16 0, i16* [[AA]], align 2
// CHECK3-32-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK3-32-NEXT:    [[TMP1:%.*]] = call i8* @llvm.stacksave()
// CHECK3-32-NEXT:    store i8* [[TMP1]], i8** [[SAVED_STACK]], align 4
// CHECK3-32-NEXT:    [[VLA:%.*]] = alloca float, i32 [[TMP0]], align 4
// CHECK3-32-NEXT:    store i32 [[TMP0]], i32* [[__VLA_EXPR0]], align 4
// CHECK3-32-NEXT:    [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK3-32-NEXT:    [[TMP3:%.*]] = mul nuw i32 5, [[TMP2]]
// CHECK3-32-NEXT:    [[VLA1:%.*]] = alloca double, i32 [[TMP3]], align 8
// CHECK3-32-NEXT:    store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4
// CHECK3-32-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0
// CHECK3-32-NEXT:    [[TMP4:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK3-32-NEXT:    store i32 [[TMP4]], i32* [[X]], align 4
// CHECK3-32-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1
// CHECK3-32-NEXT:    [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK3-32-NEXT:    store i32 [[TMP5]], i32* [[Y]], align 4
// CHECK3-32-NEXT:    store i32* [[A]], i32** [[P]], align 64
// CHECK3-32-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A]], align 4
// CHECK3-32-NEXT:    store i32 [[TMP6]], i32* [[A_CASTED]], align 4
// CHECK3-32-NEXT:    [[TMP7:%.*]] = load i32, i32* [[A_CASTED]], align 4
// CHECK3-32-NEXT:    [[TMP8:%.*]] = load i32*, i32** [[P]], align 64
// CHECK3-32-NEXT:    [[TMP9:%.*]] = load i32, i32* @ga, align 4
// CHECK3-32-NEXT:    store i32 [[TMP9]], i32* [[GA_CASTED]], align 4
// CHECK3-32-NEXT:    [[TMP10:%.*]] = load i32, i32* [[GA_CASTED]], align 4
// CHECK3-32-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK3-32-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32*
// CHECK3-32-NEXT:    store i32 [[TMP7]], i32* [[TMP12]], align 4
// CHECK3-32-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-32-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i32*
// CHECK3-32-NEXT:    store i32 [[TMP7]], i32* [[TMP14]], align 4
// CHECK3-32-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
// CHECK3-32-NEXT:    store i8* null, i8** [[TMP15]], align 4
// CHECK3-32-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK3-32-NEXT:    [[TMP17:%.*]] = bitcast i8** [[TMP16]] to i32**
// CHECK3-32-NEXT:    store i32* [[TMP8]], i32** [[TMP17]], align 4
// CHECK3-32-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK3-32-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
// CHECK3-32-NEXT:    store i32* [[TMP8]], i32** [[TMP19]], align 4
// CHECK3-32-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
// CHECK3-32-NEXT:    store i8* null, i8** [[TMP20]], align 4
// CHECK3-32-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK3-32-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i32*
// CHECK3-32-NEXT:    store i32 [[TMP10]], i32* [[TMP22]], align 4
// CHECK3-32-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK3-32-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32*
// CHECK3-32-NEXT:    store i32 [[TMP10]], i32* [[TMP24]], align 4
// CHECK3-32-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
// CHECK3-32-NEXT:    store i8* null, i8** [[TMP25]], align 4
// CHECK3-32-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK3-32-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-32-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK3-32-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK3-32-NEXT:    store i32 2, i32* [[TMP28]], align 4
// CHECK3-32-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK3-32-NEXT:    store i32 3, i32* [[TMP29]], align 4
// CHECK3-32-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK3-32-NEXT:    store i8** [[TMP26]], i8*** [[TMP30]], align 4
// CHECK3-32-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK3-32-NEXT:    store i8** [[TMP27]], i8*** [[TMP31]], align 4
// CHECK3-32-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK3-32-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP32]], align 4
// CHECK3-32-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK3-32-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP33]], align 4
// CHECK3-32-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK3-32-NEXT:    store i8** null, i8*** [[TMP34]], align 4
// CHECK3-32-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK3-32-NEXT:    store i8** null, i8*** [[TMP35]], align 4
// CHECK3-32-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK3-32-NEXT:    store i64 0, i64* [[TMP36]], align 8
// CHECK3-32-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK3-32-NEXT:    store i64 0, i64* [[TMP37]], align 8
// CHECK3-32-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK3-32-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP38]], align 4
// CHECK3-32-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK3-32-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP39]], align 4
// CHECK3-32-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK3-32-NEXT:    store i32 0, i32* [[TMP40]], align 4
// CHECK3-32-NEXT:    [[TMP41:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK3-32-NEXT:    [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0
// CHECK3-32-NEXT:    br i1 [[TMP42]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK3-32:       omp_offload.failed:
// CHECK3-32-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63(i32 [[TMP7]], i32* [[TMP8]], i32 [[TMP10]]) #[[ATTR3:[0-9]+]]
// CHECK3-32-NEXT:    br label [[OMP_OFFLOAD_CONT]]
// CHECK3-32:       omp_offload.cont:
// CHECK3-32-NEXT:    [[TMP43:%.*]] = load i16, i16* [[AA]], align 2
// CHECK3-32-NEXT:    [[CONV:%.*]] = bitcast i32* [[AA_CASTED]] to i16*
// CHECK3-32-NEXT:    store i16 [[TMP43]], i16* [[CONV]], align 2
// CHECK3-32-NEXT:    [[TMP44:%.*]] = load i32, i32* [[AA_CASTED]], align 4
// CHECK3-32-NEXT:    [[TMP45:%.*]] = mul nuw i32 [[TMP0]], 4
// CHECK3-32-NEXT:    [[TMP46:%.*]] = sext i32 [[TMP45]] to i64
// CHECK3-32-NEXT:    [[TMP47:%.*]] = mul nuw i32 5, [[TMP2]]
// CHECK3-32-NEXT:    [[TMP48:%.*]] = mul nuw i32 [[TMP47]], 8
// CHECK3-32-NEXT:    [[TMP49:%.*]] = sext i32 [[TMP48]] to i64
// CHECK3-32-NEXT:    [[TMP50:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
// CHECK3-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP50]], i8* align 4 bitcast ([9 x i64]* @.offload_sizes.1 to i8*), i32 72, i1 false)
// CHECK3-32-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0
// CHECK3-32-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32*
// CHECK3-32-NEXT:    store i32 [[TMP44]], i32* [[TMP52]], align 4
// CHECK3-32-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 0
// CHECK3-32-NEXT:    [[TMP54:%.*]] = bitcast i8** [[TMP53]] to i32*
// CHECK3-32-NEXT:    store i32 [[TMP44]], i32* [[TMP54]], align 4
// CHECK3-32-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 0
// CHECK3-32-NEXT:    store i8* null, i8** [[TMP55]], align 4
// CHECK3-32-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 1
// CHECK3-32-NEXT:    [[TMP57:%.*]] = bitcast i8** [[TMP56]] to [10 x float]**
// CHECK3-32-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP57]], align 4
// CHECK3-32-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 1
// CHECK3-32-NEXT:    [[TMP59:%.*]] = bitcast i8** [[TMP58]] to [10 x float]**
// CHECK3-32-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP59]], align 4
// CHECK3-32-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 1
// CHECK3-32-NEXT:    store i8* null, i8** [[TMP60]], align 4
// CHECK3-32-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 2
// CHECK3-32-NEXT:    [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i32*
// CHECK3-32-NEXT:    store i32 [[TMP0]], i32* [[TMP62]], align 4
// CHECK3-32-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 2
// CHECK3-32-NEXT:    [[TMP64:%.*]] = bitcast i8** [[TMP63]] to i32*
// CHECK3-32-NEXT:    store i32 [[TMP0]], i32* [[TMP64]], align 4
// CHECK3-32-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 2
// CHECK3-32-NEXT:    store i8* null, i8** [[TMP65]], align 4
// CHECK3-32-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 3
// CHECK3-32-NEXT:    [[TMP67:%.*]] = bitcast i8** [[TMP66]] to float**
// CHECK3-32-NEXT:    store float* [[VLA]], float** [[TMP67]], align 4
// CHECK3-32-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 3
// CHECK3-32-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to float**
// CHECK3-32-NEXT:    store float* [[VLA]], float** [[TMP69]], align 4
// CHECK3-32-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
// CHECK3-32-NEXT:    store i64 [[TMP46]], i64* [[TMP70]], align 4
// CHECK3-32-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 3
// CHECK3-32-NEXT:    store i8* null, i8** [[TMP71]], align 4
// CHECK3-32-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 4
// CHECK3-32-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to [5 x [10 x double]]**
// CHECK3-32-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP73]], align 4
// CHECK3-32-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 4
// CHECK3-32-NEXT:    [[TMP75:%.*]] = bitcast i8** [[TMP74]] to [5 x [10 x double]]**
// CHECK3-32-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP75]], align 4
// CHECK3-32-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 4
// CHECK3-32-NEXT:    store i8* null, i8** [[TMP76]], align 4
// CHECK3-32-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 5
// CHECK3-32-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i32*
// CHECK3-32-NEXT:    store i32 5, i32* [[TMP78]], align 4
// CHECK3-32-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 5
// CHECK3-32-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32*
// CHECK3-32-NEXT:    store i32 5, i32* [[TMP80]], align 4
// CHECK3-32-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 5
// CHECK3-32-NEXT:    store i8* null, i8** [[TMP81]], align 4
// CHECK3-32-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 6
// CHECK3-32-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32*
// CHECK3-32-NEXT:    store i32 [[TMP2]], i32* [[TMP83]], align 4
// CHECK3-32-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 6
// CHECK3-32-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i32*
// CHECK3-32-NEXT:    store i32 [[TMP2]], i32* [[TMP85]], align 4
// CHECK3-32-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 6
// CHECK3-32-NEXT:    store i8* null, i8** [[TMP86]], align 4
// CHECK3-32-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 7
// CHECK3-32-NEXT:    [[TMP88:%.*]] = bitcast i8** [[TMP87]] to double**
// CHECK3-32-NEXT:    store double* [[VLA1]], double** [[TMP88]], align 4
// CHECK3-32-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 7
// CHECK3-32-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to double**
// CHECK3-32-NEXT:    store double* [[VLA1]], double** [[TMP90]], align 4
// CHECK3-32-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
// CHECK3-32-NEXT:    store i64 [[TMP49]], i64* [[TMP91]], align 4
// CHECK3-32-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 7
// CHECK3-32-NEXT:    store i8* null, i8** [[TMP92]], align 4
// CHECK3-32-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 8
// CHECK3-32-NEXT:    [[TMP94:%.*]] = bitcast i8** [[TMP93]] to %struct.TT**
// CHECK3-32-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP94]], align 4
// CHECK3-32-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 8
// CHECK3-32-NEXT:    [[TMP96:%.*]] = bitcast i8** [[TMP95]] to %struct.TT**
// CHECK3-32-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP96]], align 4
// CHECK3-32-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 8
// CHECK3-32-NEXT:    store i8* null, i8** [[TMP97]], align 4
// CHECK3-32-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0
// CHECK3-32-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 0
// CHECK3-32-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
// CHECK3-32-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
// CHECK3-32-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 0
// CHECK3-32-NEXT:    store i32 2, i32* [[TMP101]], align 4
// CHECK3-32-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 1
// CHECK3-32-NEXT:    store i32 9, i32* [[TMP102]], align 4
// CHECK3-32-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 2
// CHECK3-32-NEXT:    store i8** [[TMP98]], i8*** [[TMP103]], align 4
// CHECK3-32-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 3
// CHECK3-32-NEXT:    store i8** [[TMP99]], i8*** [[TMP104]], align 4
// CHECK3-32-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 4
// CHECK3-32-NEXT:    store i64* [[TMP100]], i64** [[TMP105]], align 4
// CHECK3-32-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 5
// CHECK3-32-NEXT:    store i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.2, i32 0, i32 0), i64** [[TMP106]], align 4
// CHECK3-32-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 6
// CHECK3-32-NEXT:    store i8** null, i8*** [[TMP107]], align 4
// CHECK3-32-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 7
// CHECK3-32-NEXT:    store i8** null, i8*** [[TMP108]], align 4
// CHECK3-32-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 8
// CHECK3-32-NEXT:    store i64 0, i64* [[TMP109]], align 8
// CHECK3-32-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 9
// CHECK3-32-NEXT:    store i64 0, i64* [[TMP110]], align 8
// CHECK3-32-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 10
// CHECK3-32-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP111]], align 4
// CHECK3-32-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 11
// CHECK3-32-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP112]], align 4
// CHECK3-32-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 12
// CHECK3-32-NEXT:    store i32 0, i32* [[TMP113]], align 4
// CHECK3-32-NEXT:    [[TMP114:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]])
// CHECK3-32-NEXT:    [[TMP115:%.*]] = icmp ne i32 [[TMP114]], 0
// CHECK3-32-NEXT:    br i1 [[TMP115]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]]
// CHECK3-32:       omp_offload.failed6:
// CHECK3-32-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70(i32 [[TMP44]], [10 x float]* [[B]], i32 [[TMP0]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP2]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR3]]
// CHECK3-32-NEXT:    br label [[OMP_OFFLOAD_CONT7]]
// CHECK3-32:       omp_offload.cont7:
// CHECK3-32-NEXT:    [[TMP116:%.*]] = load double*, double** [[PTR_ADDR]], align 4
// CHECK3-32-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
// CHECK3-32-NEXT:    [[TMP118:%.*]] = bitcast i8** [[TMP117]] to double**
// CHECK3-32-NEXT:    store double* [[TMP116]], double** [[TMP118]], align 4
// CHECK3-32-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
// CHECK3-32-NEXT:    [[TMP120:%.*]] = bitcast i8** [[TMP119]] to double**
// CHECK3-32-NEXT:    store double* [[TMP116]], double** [[TMP120]], align 4
// CHECK3-32-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS10]], i32 0, i32 0
// CHECK3-32-NEXT:    store i8* null, i8** [[TMP121]], align 4
// CHECK3-32-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 1
// CHECK3-32-NEXT:    [[TMP123:%.*]] = bitcast i8** [[TMP122]] to %struct.TT.0**
// CHECK3-32-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[TMP123]], align 4
// CHECK3-32-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 1
// CHECK3-32-NEXT:    [[TMP125:%.*]] = bitcast i8** [[TMP124]] to %struct.TT.0**
// CHECK3-32-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[TMP125]], align 4
// CHECK3-32-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS10]], i32 0, i32 1
// CHECK3-32-NEXT:    store i8* null, i8** [[TMP126]], align 4
// CHECK3-32-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
// CHECK3-32-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
// CHECK3-32-NEXT:    [[KERNEL_ARGS11:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
// CHECK3-32-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 0
// CHECK3-32-NEXT:    store i32 2, i32* [[TMP129]], align 4
// CHECK3-32-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 1
// CHECK3-32-NEXT:    store i32 2, i32* [[TMP130]], align 4
// CHECK3-32-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 2
// CHECK3-32-NEXT:    store i8** [[TMP127]], i8*** [[TMP131]], align 4
// CHECK3-32-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 3
// CHECK3-32-NEXT:    store i8** [[TMP128]], i8*** [[TMP132]], align 4
// CHECK3-32-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 4
// CHECK3-32-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.3, i32 0, i32 0), i64** [[TMP133]], align 4
// CHECK3-32-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 5
// CHECK3-32-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.4, i32 0, i32 0), i64** [[TMP134]], align 4
// CHECK3-32-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 6
// CHECK3-32-NEXT:    store i8** null, i8*** [[TMP135]], align 4
// CHECK3-32-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 7
// CHECK3-32-NEXT:    store i8** null, i8*** [[TMP136]], align 4
// CHECK3-32-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 8
// CHECK3-32-NEXT:    store i64 0, i64* [[TMP137]], align 8
// CHECK3-32-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 9
// CHECK3-32-NEXT:    store i64 0, i64* [[TMP138]], align 8
// CHECK3-32-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 10
// CHECK3-32-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP139]], align 4
// CHECK3-32-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 11
// CHECK3-32-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP140]], align 4
// CHECK3-32-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 12
// CHECK3-32-NEXT:    store i32 0, i32* [[TMP141]], align 4
// CHECK3-32-NEXT:    [[TMP142:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]])
// CHECK3-32-NEXT:    [[TMP143:%.*]] = icmp ne i32 [[TMP142]], 0
// CHECK3-32-NEXT:    br i1 [[TMP143]], label [[OMP_OFFLOAD_FAILED12:%.*]], label [[OMP_OFFLOAD_CONT13:%.*]]
// CHECK3-32:       omp_offload.failed12:
// CHECK3-32-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111(double* [[TMP116]], %struct.TT.0* [[E]]) #[[ATTR3]]
// CHECK3-32-NEXT:    br label [[OMP_OFFLOAD_CONT13]]
// CHECK3-32:       omp_offload.cont13:
// CHECK3-32-NEXT:    [[TMP144:%.*]] = load i32, i32* [[A]], align 4
// CHECK3-32-NEXT:    [[TMP145:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
// CHECK3-32-NEXT:    call void @llvm.stackrestore(i8* [[TMP145]])
// CHECK3-32-NEXT:    ret i32 [[TMP144]]
// CHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63
// CHECK3-32-SAME: (i32 noundef [[A:%.*]], i32* noundef [[P:%.*]], i32 noundef [[GA:%.*]]) #[[ATTR2:[0-9]+]] {
// CHECK3-32-NEXT:  entry:
// CHECK3-32-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT:    [[P_ADDR:%.*]] = alloca i32*, align 4
// CHECK3-32-NEXT:    [[GA_ADDR:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
// CHECK3-32-NEXT:    store i32* [[P]], i32** [[P_ADDR]], align 4
// CHECK3-32-NEXT:    store i32 [[GA]], i32* [[GA_ADDR]], align 4
// CHECK3-32-NEXT:    ret void
// CHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70
// CHECK3-32-SAME: (i32 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR2]] {
// CHECK3-32-NEXT:  entry:
// CHECK3-32-NEXT:    [[AA_ADDR:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT:    [[B_ADDR:%.*]] = alloca [10 x float]*, align 4
// CHECK3-32-NEXT:    [[VLA_ADDR:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT:    [[BN_ADDR:%.*]] = alloca float*, align 4
// CHECK3-32-NEXT:    [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 4
// CHECK3-32-NEXT:    [[VLA_ADDR2:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT:    [[VLA_ADDR4:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT:    [[CN_ADDR:%.*]] = alloca double*, align 4
// CHECK3-32-NEXT:    [[D_ADDR:%.*]] = alloca %struct.TT*, align 4
// CHECK3-32-NEXT:    [[B5:%.*]] = alloca [10 x float], align 4
// CHECK3-32-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 4
// CHECK3-32-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT:    [[C7:%.*]] = alloca [5 x [10 x double]], align 8
// CHECK3-32-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT:    [[__VLA_EXPR2:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT:    [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 4
// CHECK3-32-NEXT:    store i32 [[AA]], i32* [[AA_ADDR]], align 4
// CHECK3-32-NEXT:    store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4
// CHECK3-32-NEXT:    store i32 [[VLA]], i32* [[VLA_ADDR]], align 4
// CHECK3-32-NEXT:    store float* [[BN]], float** [[BN_ADDR]], align 4
// CHECK3-32-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 4
// CHECK3-32-NEXT:    store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4
// CHECK3-32-NEXT:    store i32 [[VLA3]], i32* [[VLA_ADDR4]], align 4
// CHECK3-32-NEXT:    store double* [[CN]], double** [[CN_ADDR]], align 4
// CHECK3-32-NEXT:    store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 4
// CHECK3-32-NEXT:    [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16*
// CHECK3-32-NEXT:    [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 4
// CHECK3-32-NEXT:    [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4
// CHECK3-32-NEXT:    [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 4
// CHECK3-32-NEXT:    [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 4
// CHECK3-32-NEXT:    [[TMP4:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4
// CHECK3-32-NEXT:    [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4
// CHECK3-32-NEXT:    [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4
// CHECK3-32-NEXT:    [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4
// CHECK3-32-NEXT:    [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8*
// CHECK3-32-NEXT:    [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8*
// CHECK3-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i32 40, i1 false)
// CHECK3-32-NEXT:    [[TMP10:%.*]] = call i8* @llvm.stacksave()
// CHECK3-32-NEXT:    store i8* [[TMP10]], i8** [[SAVED_STACK]], align 4
// CHECK3-32-NEXT:    [[VLA6:%.*]] = alloca float, i32 [[TMP1]], align 4
// CHECK3-32-NEXT:    store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4
// CHECK3-32-NEXT:    [[TMP11:%.*]] = mul nuw i32 [[TMP1]], 4
// CHECK3-32-NEXT:    [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8*
// CHECK3-32-NEXT:    [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8*
// CHECK3-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 [[TMP11]], i1 false)
// CHECK3-32-NEXT:    [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8*
// CHECK3-32-NEXT:    [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8*
// CHECK3-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i32 400, i1 false)
// CHECK3-32-NEXT:    [[TMP16:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]]
// CHECK3-32-NEXT:    [[VLA8:%.*]] = alloca double, i32 [[TMP16]], align 8
// CHECK3-32-NEXT:    store i32 [[TMP4]], i32* [[__VLA_EXPR1]], align 4
// CHECK3-32-NEXT:    store i32 [[TMP5]], i32* [[__VLA_EXPR2]], align 4
// CHECK3-32-NEXT:    [[TMP17:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]]
// CHECK3-32-NEXT:    [[TMP18:%.*]] = mul nuw i32 [[TMP17]], 8
// CHECK3-32-NEXT:    [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8*
// CHECK3-32-NEXT:    [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8*
// CHECK3-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i32 [[TMP18]], i1 false)
// CHECK3-32-NEXT:    [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8*
// CHECK3-32-NEXT:    [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8*
// CHECK3-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP21]], i8* align 4 [[TMP22]], i32 12, i1 false)
// CHECK3-32-NEXT:    [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2
// CHECK3-32-NEXT:    [[CONV10:%.*]] = sext i16 [[TMP23]] to i32
// CHECK3-32-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV10]], 1
// CHECK3-32-NEXT:    [[CONV11:%.*]] = trunc i32 [[ADD]] to i16
// CHECK3-32-NEXT:    store i16 [[CONV11]], i16* [[CONV]], align 2
// CHECK3-32-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i32 0, i32 2
// CHECK3-32-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX]], align 4
// CHECK3-32-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i32 3
// CHECK3-32-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX12]], align 4
// CHECK3-32-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i32 0, i32 1
// CHECK3-32-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i32 0, i32 2
// CHECK3-32-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX14]], align 8
// CHECK3-32-NEXT:    [[TMP24:%.*]] = mul nsw i32 1, [[TMP5]]
// CHECK3-32-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i32 [[TMP24]]
// CHECK3-32-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i32 3
// CHECK3-32-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX16]], align 8
// CHECK3-32-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0
// CHECK3-32-NEXT:    store i64 1, i64* [[X]], align 4
// CHECK3-32-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1
// CHECK3-32-NEXT:    store i8 1, i8* [[Y]], align 4
// CHECK3-32-NEXT:    [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
// CHECK3-32-NEXT:    call void @llvm.stackrestore(i8* [[TMP25]])
// CHECK3-32-NEXT:    ret void
// CHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111
// CHECK3-32-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR2]] {
// CHECK3-32-NEXT:  entry:
// CHECK3-32-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 4
// CHECK3-32-NEXT:    [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 4
// CHECK3-32-NEXT:    [[E1:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
// CHECK3-32-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 4
// CHECK3-32-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 4
// CHECK3-32-NEXT:    [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 4
// CHECK3-32-NEXT:    [[TMP1:%.*]] = bitcast %struct.TT.0* [[E1]] to i8*
// CHECK3-32-NEXT:    [[TMP2:%.*]] = bitcast %struct.TT.0* [[TMP0]] to i8*
// CHECK3-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 8, i1 false)
// CHECK3-32-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E1]], i32 0, i32 0
// CHECK3-32-NEXT:    [[TMP3:%.*]] = load i32, i32* [[X]], align 4
// CHECK3-32-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP3]] to double
// CHECK3-32-NEXT:    [[TMP4:%.*]] = load double*, double** [[PTR_ADDR]], align 4
// CHECK3-32-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP4]], i32 0
// CHECK3-32-NEXT:    store double [[CONV]], double* [[ARRAYIDX]], align 4
// CHECK3-32-NEXT:    [[TMP5:%.*]] = load double*, double** [[PTR_ADDR]], align 4
// CHECK3-32-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[TMP5]], i32 0
// CHECK3-32-NEXT:    [[TMP6:%.*]] = load double, double* [[ARRAYIDX2]], align 4
// CHECK3-32-NEXT:    [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00
// CHECK3-32-NEXT:    store double [[INC]], double* [[ARRAYIDX2]], align 4
// CHECK3-32-NEXT:    ret void
// CHECK3-32-LABEL: define {{[^@]+}}@_Z3bariPd
// CHECK3-32-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] {
// CHECK3-32-NEXT:  entry:
// CHECK3-32-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 4
// CHECK3-32-NEXT:    [[A:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT:    [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 4
// CHECK3-32-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK3-32-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 4
// CHECK3-32-NEXT:    store i32 0, i32* [[A]], align 4
// CHECK3-32-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK3-32-NEXT:    [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 4
// CHECK3-32-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z3fooiPd(i32 noundef [[TMP0]], double* noundef [[TMP1]])
// CHECK3-32-NEXT:    [[TMP2:%.*]] = load i32, i32* [[A]], align 4
// CHECK3-32-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]]
// CHECK3-32-NEXT:    store i32 [[ADD]], i32* [[A]], align 4
// CHECK3-32-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK3-32-NEXT:    [[CALL1:%.*]] = call noundef i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 4 dereferenceable(8) [[S]], i32 noundef [[TMP3]])
// CHECK3-32-NEXT:    [[TMP4:%.*]] = load i32, i32* [[A]], align 4
// CHECK3-32-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]]
// CHECK3-32-NEXT:    store i32 [[ADD2]], i32* [[A]], align 4
// CHECK3-32-NEXT:    [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK3-32-NEXT:    [[CALL3:%.*]] = call noundef i32 @_ZL7fstatici(i32 noundef [[TMP5]])
// CHECK3-32-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A]], align 4
// CHECK3-32-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]]
// CHECK3-32-NEXT:    store i32 [[ADD4]], i32* [[A]], align 4
// CHECK3-32-NEXT:    [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK3-32-NEXT:    [[CALL5:%.*]] = call noundef i32 @_Z9ftemplateIiET_i(i32 noundef [[TMP7]])
// CHECK3-32-NEXT:    [[TMP8:%.*]] = load i32, i32* [[A]], align 4
// CHECK3-32-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]]
// CHECK3-32-NEXT:    store i32 [[ADD6]], i32* [[A]], align 4
// CHECK3-32-NEXT:    [[TMP9:%.*]] = load i32, i32* [[A]], align 4
// CHECK3-32-NEXT:    ret i32 [[TMP9]]
// CHECK3-32-LABEL: define {{[^@]+}}@_ZN2S12r1Ei
// CHECK3-32-SAME: (%struct.S1* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[N:%.*]]) #[[ATTR0]] comdat align 2 {
// CHECK3-32-NEXT:  entry:
// CHECK3-32-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4
// CHECK3-32-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT:    [[B:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 4
// CHECK3-32-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT:    [[B_CASTED:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [5 x i8*], align 4
// CHECK3-32-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x i8*], align 4
// CHECK3-32-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x i8*], align 4
// CHECK3-32-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 4
// CHECK3-32-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4
// CHECK3-32-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK3-32-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4
// CHECK3-32-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK3-32-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// CHECK3-32-NEXT:    store i32 [[ADD]], i32* [[B]], align 4
// CHECK3-32-NEXT:    [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK3-32-NEXT:    [[TMP2:%.*]] = call i8* @llvm.stacksave()
// CHECK3-32-NEXT:    store i8* [[TMP2]], i8** [[SAVED_STACK]], align 4
// CHECK3-32-NEXT:    [[TMP3:%.*]] = mul nuw i32 2, [[TMP1]]
// CHECK3-32-NEXT:    [[VLA:%.*]] = alloca i16, i32 [[TMP3]], align 2
// CHECK3-32-NEXT:    store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4
// CHECK3-32-NEXT:    [[TMP4:%.*]] = load i32, i32* [[B]], align 4
// CHECK3-32-NEXT:    store i32 [[TMP4]], i32* [[B_CASTED]], align 4
// CHECK3-32-NEXT:    [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4
// CHECK3-32-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
// CHECK3-32-NEXT:    [[TMP6:%.*]] = mul nuw i32 2, [[TMP1]]
// CHECK3-32-NEXT:    [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2
// CHECK3-32-NEXT:    [[TMP8:%.*]] = sext i32 [[TMP7]] to i64
// CHECK3-32-NEXT:    [[TMP9:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
// CHECK3-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP9]], i8* align 4 bitcast ([5 x i64]* @.offload_sizes.5 to i8*), i32 40, i1 false)
// CHECK3-32-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK3-32-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
// CHECK3-32-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 4
// CHECK3-32-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-32-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
// CHECK3-32-NEXT:    store double* [[A]], double** [[TMP13]], align 4
// CHECK3-32-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
// CHECK3-32-NEXT:    store i8* null, i8** [[TMP14]], align 4
// CHECK3-32-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK3-32-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i32*
// CHECK3-32-NEXT:    store i32 [[TMP5]], i32* [[TMP16]], align 4
// CHECK3-32-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK3-32-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32*
// CHECK3-32-NEXT:    store i32 [[TMP5]], i32* [[TMP18]], align 4
// CHECK3-32-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
// CHECK3-32-NEXT:    store i8* null, i8** [[TMP19]], align 4
// CHECK3-32-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK3-32-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32*
// CHECK3-32-NEXT:    store i32 2, i32* [[TMP21]], align 4
// CHECK3-32-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK3-32-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32*
// CHECK3-32-NEXT:    store i32 2, i32* [[TMP23]], align 4
// CHECK3-32-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
// CHECK3-32-NEXT:    store i8* null, i8** [[TMP24]], align 4
// CHECK3-32-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
// CHECK3-32-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i32*
// CHECK3-32-NEXT:    store i32 [[TMP1]], i32* [[TMP26]], align 4
// CHECK3-32-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
// CHECK3-32-NEXT:    [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i32*
// CHECK3-32-NEXT:    store i32 [[TMP1]], i32* [[TMP28]], align 4
// CHECK3-32-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
// CHECK3-32-NEXT:    store i8* null, i8** [[TMP29]], align 4
// CHECK3-32-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
// CHECK3-32-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i16**
// CHECK3-32-NEXT:    store i16* [[VLA]], i16** [[TMP31]], align 4
// CHECK3-32-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
// CHECK3-32-NEXT:    [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i16**
// CHECK3-32-NEXT:    store i16* [[VLA]], i16** [[TMP33]], align 4
// CHECK3-32-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
// CHECK3-32-NEXT:    store i64 [[TMP8]], i64* [[TMP34]], align 4
// CHECK3-32-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
// CHECK3-32-NEXT:    store i8* null, i8** [[TMP35]], align 4
// CHECK3-32-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK3-32-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-32-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
// CHECK3-32-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK3-32-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK3-32-NEXT:    store i32 2, i32* [[TMP39]], align 4
// CHECK3-32-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK3-32-NEXT:    store i32 5, i32* [[TMP40]], align 4
// CHECK3-32-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK3-32-NEXT:    store i8** [[TMP36]], i8*** [[TMP41]], align 4
// CHECK3-32-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK3-32-NEXT:    store i8** [[TMP37]], i8*** [[TMP42]], align 4
// CHECK3-32-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK3-32-NEXT:    store i64* [[TMP38]], i64** [[TMP43]], align 4
// CHECK3-32-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK3-32-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.6, i32 0, i32 0), i64** [[TMP44]], align 4
// CHECK3-32-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK3-32-NEXT:    store i8** null, i8*** [[TMP45]], align 4
// CHECK3-32-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK3-32-NEXT:    store i8** null, i8*** [[TMP46]], align 4
// CHECK3-32-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK3-32-NEXT:    store i64 0, i64* [[TMP47]], align 8
// CHECK3-32-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK3-32-NEXT:    store i64 0, i64* [[TMP48]], align 8
// CHECK3-32-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK3-32-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP49]], align 4
// CHECK3-32-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK3-32-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP50]], align 4
// CHECK3-32-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK3-32-NEXT:    store i32 0, i32* [[TMP51]], align 4
// CHECK3-32-NEXT:    [[TMP52:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK3-32-NEXT:    [[TMP53:%.*]] = icmp ne i32 [[TMP52]], 0
// CHECK3-32-NEXT:    br i1 [[TMP53]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK3-32:       omp_offload.failed:
// CHECK3-32-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR3]]
// CHECK3-32-NEXT:    br label [[OMP_OFFLOAD_CONT]]
// CHECK3-32:       omp_offload.cont:
// CHECK3-32-NEXT:    [[TMP54:%.*]] = mul nsw i32 1, [[TMP1]]
// CHECK3-32-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP54]]
// CHECK3-32-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1
// CHECK3-32-NEXT:    [[TMP55:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
// CHECK3-32-NEXT:    [[CONV:%.*]] = sext i16 [[TMP55]] to i32
// CHECK3-32-NEXT:    [[TMP56:%.*]] = load i32, i32* [[B]], align 4
// CHECK3-32-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP56]]
// CHECK3-32-NEXT:    [[TMP57:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
// CHECK3-32-NEXT:    call void @llvm.stackrestore(i8* [[TMP57]])
// CHECK3-32-NEXT:    ret i32 [[ADD3]]
// CHECK3-32-LABEL: define {{[^@]+}}@_ZL7fstatici
// CHECK3-32-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] {
// CHECK3-32-NEXT:  entry:
// CHECK3-32-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT:    [[A:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT:    [[AAA:%.*]] = alloca i8, align 1
// CHECK3-32-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
// CHECK3-32-NEXT:    [[A_CASTED:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT:    [[AAA_CASTED:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 4
// CHECK3-32-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 4
// CHECK3-32-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 4
// CHECK3-32-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK3-32-NEXT:    store i32 0, i32* [[A]], align 4
// CHECK3-32-NEXT:    store i8 0, i8* [[AAA]], align 1
// CHECK3-32-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
// CHECK3-32-NEXT:    store i32 [[TMP0]], i32* [[A_CASTED]], align 4
// CHECK3-32-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4
// CHECK3-32-NEXT:    [[TMP2:%.*]] = load i8, i8* [[AAA]], align 1
// CHECK3-32-NEXT:    [[CONV:%.*]] = bitcast i32* [[AAA_CASTED]] to i8*
// CHECK3-32-NEXT:    store i8 [[TMP2]], i8* [[CONV]], align 1
// CHECK3-32-NEXT:    [[TMP3:%.*]] = load i32, i32* [[AAA_CASTED]], align 4
// CHECK3-32-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK3-32-NEXT:    [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i32*
// CHECK3-32-NEXT:    store i32 [[TMP1]], i32* [[TMP5]], align 4
// CHECK3-32-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-32-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32*
// CHECK3-32-NEXT:    store i32 [[TMP1]], i32* [[TMP7]], align 4
// CHECK3-32-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
// CHECK3-32-NEXT:    store i8* null, i8** [[TMP8]], align 4
// CHECK3-32-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK3-32-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
// CHECK3-32-NEXT:    store i32 [[TMP3]], i32* [[TMP10]], align 4
// CHECK3-32-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK3-32-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32*
// CHECK3-32-NEXT:    store i32 [[TMP3]], i32* [[TMP12]], align 4
// CHECK3-32-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
// CHECK3-32-NEXT:    store i8* null, i8** [[TMP13]], align 4
// CHECK3-32-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK3-32-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to [10 x i32]**
// CHECK3-32-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP15]], align 4
// CHECK3-32-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK3-32-NEXT:    [[TMP17:%.*]] = bitcast i8** [[TMP16]] to [10 x i32]**
// CHECK3-32-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP17]], align 4
// CHECK3-32-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
// CHECK3-32-NEXT:    store i8* null, i8** [[TMP18]], align 4
// CHECK3-32-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK3-32-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-32-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK3-32-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK3-32-NEXT:    store i32 2, i32* [[TMP21]], align 4
// CHECK3-32-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK3-32-NEXT:    store i32 3, i32* [[TMP22]], align 4
// CHECK3-32-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK3-32-NEXT:    store i8** [[TMP19]], i8*** [[TMP23]], align 4
// CHECK3-32-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK3-32-NEXT:    store i8** [[TMP20]], i8*** [[TMP24]], align 4
// CHECK3-32-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK3-32-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.7, i32 0, i32 0), i64** [[TMP25]], align 4
// CHECK3-32-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK3-32-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.8, i32 0, i32 0), i64** [[TMP26]], align 4
// CHECK3-32-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK3-32-NEXT:    store i8** null, i8*** [[TMP27]], align 4
// CHECK3-32-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK3-32-NEXT:    store i8** null, i8*** [[TMP28]], align 4
// CHECK3-32-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK3-32-NEXT:    store i64 0, i64* [[TMP29]], align 8
// CHECK3-32-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK3-32-NEXT:    store i64 0, i64* [[TMP30]], align 8
// CHECK3-32-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK3-32-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP31]], align 4
// CHECK3-32-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK3-32-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP32]], align 4
// CHECK3-32-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK3-32-NEXT:    store i32 0, i32* [[TMP33]], align 4
// CHECK3-32-NEXT:    [[TMP34:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK3-32-NEXT:    [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0
// CHECK3-32-NEXT:    br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK3-32:       omp_offload.failed:
// CHECK3-32-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142(i32 [[TMP1]], i32 [[TMP3]], [10 x i32]* [[B]]) #[[ATTR3]]
// CHECK3-32-NEXT:    br label [[OMP_OFFLOAD_CONT]]
// CHECK3-32:       omp_offload.cont:
// CHECK3-32-NEXT:    [[TMP36:%.*]] = load i32, i32* [[A]], align 4
// CHECK3-32-NEXT:    ret i32 [[TMP36]]
// CHECK3-32-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i
// CHECK3-32-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] comdat {
// CHECK3-32-NEXT:  entry:
// CHECK3-32-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT:    [[A:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
// CHECK3-32-NEXT:    [[A_CASTED:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x i8*], align 4
// CHECK3-32-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x i8*], align 4
// CHECK3-32-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x i8*], align 4
// CHECK3-32-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK3-32-NEXT:    store i32 0, i32* [[A]], align 4
// CHECK3-32-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
// CHECK3-32-NEXT:    store i32 [[TMP0]], i32* [[A_CASTED]], align 4
// CHECK3-32-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4
// CHECK3-32-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK3-32-NEXT:    [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32*
// CHECK3-32-NEXT:    store i32 [[TMP1]], i32* [[TMP3]], align 4
// CHECK3-32-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-32-NEXT:    [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i32*
// CHECK3-32-NEXT:    store i32 [[TMP1]], i32* [[TMP5]], align 4
// CHECK3-32-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
// CHECK3-32-NEXT:    store i8* null, i8** [[TMP6]], align 4
// CHECK3-32-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK3-32-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to [10 x i32]**
// CHECK3-32-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP8]], align 4
// CHECK3-32-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK3-32-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to [10 x i32]**
// CHECK3-32-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP10]], align 4
// CHECK3-32-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
// CHECK3-32-NEXT:    store i8* null, i8** [[TMP11]], align 4
// CHECK3-32-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK3-32-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-32-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK3-32-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK3-32-NEXT:    store i32 2, i32* [[TMP14]], align 4
// CHECK3-32-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK3-32-NEXT:    store i32 2, i32* [[TMP15]], align 4
// CHECK3-32-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK3-32-NEXT:    store i8** [[TMP12]], i8*** [[TMP16]], align 4
// CHECK3-32-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK3-32-NEXT:    store i8** [[TMP13]], i8*** [[TMP17]], align 4
// CHECK3-32-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK3-32-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.9, i32 0, i32 0), i64** [[TMP18]], align 4
// CHECK3-32-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK3-32-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.10, i32 0, i32 0), i64** [[TMP19]], align 4
// CHECK3-32-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK3-32-NEXT:    store i8** null, i8*** [[TMP20]], align 4
// CHECK3-32-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK3-32-NEXT:    store i8** null, i8*** [[TMP21]], align 4
// CHECK3-32-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK3-32-NEXT:    store i64 0, i64* [[TMP22]], align 8
// CHECK3-32-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK3-32-NEXT:    store i64 0, i64* [[TMP23]], align 8
// CHECK3-32-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK3-32-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP24]], align 4
// CHECK3-32-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK3-32-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP25]], align 4
// CHECK3-32-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK3-32-NEXT:    store i32 0, i32* [[TMP26]], align 4
// CHECK3-32-NEXT:    [[TMP27:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK3-32-NEXT:    [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0
// CHECK3-32-NEXT:    br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK3-32:       omp_offload.failed:
// CHECK3-32-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128(i32 [[TMP1]], [10 x i32]* [[B]]) #[[ATTR3]]
// CHECK3-32-NEXT:    br label [[OMP_OFFLOAD_CONT]]
// CHECK3-32:       omp_offload.cont:
// CHECK3-32-NEXT:    [[TMP29:%.*]] = load i32, i32* [[A]], align 4
// CHECK3-32-NEXT:    ret i32 [[TMP29]]
// CHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167
// CHECK3-32-SAME: (%struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] {
// CHECK3-32-NEXT:  entry:
// CHECK3-32-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4
// CHECK3-32-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT:    [[VLA_ADDR:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT:    [[VLA_ADDR2:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT:    [[C_ADDR:%.*]] = alloca i16*, align 4
// CHECK3-32-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 4
// CHECK3-32-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4
// CHECK3-32-NEXT:    store i32 [[B]], i32* [[B_ADDR]], align 4
// CHECK3-32-NEXT:    store i32 [[VLA]], i32* [[VLA_ADDR]], align 4
// CHECK3-32-NEXT:    store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4
// CHECK3-32-NEXT:    store i16* [[C]], i16** [[C_ADDR]], align 4
// CHECK3-32-NEXT:    [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4
// CHECK3-32-NEXT:    [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4
// CHECK3-32-NEXT:    [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4
// CHECK3-32-NEXT:    [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4
// CHECK3-32-NEXT:    [[TMP4:%.*]] = call i8* @llvm.stacksave()
// CHECK3-32-NEXT:    store i8* [[TMP4]], i8** [[SAVED_STACK]], align 4
// CHECK3-32-NEXT:    [[TMP5:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
// CHECK3-32-NEXT:    [[VLA3:%.*]] = alloca i16, i32 [[TMP5]], align 2
// CHECK3-32-NEXT:    store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4
// CHECK3-32-NEXT:    store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4
// CHECK3-32-NEXT:    [[TMP6:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
// CHECK3-32-NEXT:    [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2
// CHECK3-32-NEXT:    [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8*
// CHECK3-32-NEXT:    [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8*
// CHECK3-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i32 [[TMP7]], i1 false)
// CHECK3-32-NEXT:    [[TMP10:%.*]] = load i32, i32* [[B_ADDR]], align 4
// CHECK3-32-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP10]] to double
// CHECK3-32-NEXT:    [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00
// CHECK3-32-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0
// CHECK3-32-NEXT:    store double [[ADD]], double* [[A]], align 4
// CHECK3-32-NEXT:    [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0
// CHECK3-32-NEXT:    [[TMP11:%.*]] = load double, double* [[A4]], align 4
// CHECK3-32-NEXT:    [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00
// CHECK3-32-NEXT:    store double [[INC]], double* [[A4]], align 4
// CHECK3-32-NEXT:    [[CONV5:%.*]] = fptosi double [[INC]] to i16
// CHECK3-32-NEXT:    [[TMP12:%.*]] = mul nsw i32 1, [[TMP2]]
// CHECK3-32-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i32 [[TMP12]]
// CHECK3-32-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1
// CHECK3-32-NEXT:    store i16 [[CONV5]], i16* [[ARRAYIDX6]], align 2
// CHECK3-32-NEXT:    [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
// CHECK3-32-NEXT:    call void @llvm.stackrestore(i8* [[TMP13]])
// CHECK3-32-NEXT:    ret void
// CHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142
// CHECK3-32-SAME: (i32 noundef [[A:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
// CHECK3-32-NEXT:  entry:
// CHECK3-32-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT:    [[AAA_ADDR:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT:    [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4
// CHECK3-32-NEXT:    [[B1:%.*]] = alloca [10 x i32], align 4
// CHECK3-32-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
// CHECK3-32-NEXT:    store i32 [[AAA]], i32* [[AAA_ADDR]], align 4
// CHECK3-32-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4
// CHECK3-32-NEXT:    [[CONV:%.*]] = bitcast i32* [[AAA_ADDR]] to i8*
// CHECK3-32-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4
// CHECK3-32-NEXT:    [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8*
// CHECK3-32-NEXT:    [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
// CHECK3-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false)
// CHECK3-32-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
// CHECK3-32-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// CHECK3-32-NEXT:    store i32 [[ADD]], i32* [[A_ADDR]], align 4
// CHECK3-32-NEXT:    [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1
// CHECK3-32-NEXT:    [[CONV2:%.*]] = sext i8 [[TMP4]] to i32
// CHECK3-32-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1
// CHECK3-32-NEXT:    [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8
// CHECK3-32-NEXT:    store i8 [[CONV4]], i8* [[CONV]], align 1
// CHECK3-32-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2
// CHECK3-32-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// CHECK3-32-NEXT:    [[ADD5:%.*]] = add nsw i32 [[TMP5]], 1
// CHECK3-32-NEXT:    store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4
// CHECK3-32-NEXT:    ret void
// CHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128
// CHECK3-32-SAME: (i32 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
// CHECK3-32-NEXT:  entry:
// CHECK3-32-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
// CHECK3-32-NEXT:    [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4
// CHECK3-32-NEXT:    [[B1:%.*]] = alloca [10 x i32], align 4
// CHECK3-32-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
// CHECK3-32-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4
// CHECK3-32-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4
// CHECK3-32-NEXT:    [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8*
// CHECK3-32-NEXT:    [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
// CHECK3-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false)
// CHECK3-32-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
// CHECK3-32-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// CHECK3-32-NEXT:    store i32 [[ADD]], i32* [[A_ADDR]], align 4
// CHECK3-32-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2
// CHECK3-32-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// CHECK3-32-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1
// CHECK3-32-NEXT:    store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4
// CHECK3-32-NEXT:    ret void
// CHECK3-32-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
// CHECK3-32-SAME: () #[[ATTR5:[0-9]+]] {
// CHECK3-32-NEXT:  entry:
// CHECK3-32-NEXT:    call void @__tgt_register_requires(i64 1)
// CHECK3-32-NEXT:    ret void
// TCHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63
// TCHECK-64-SAME: (i64 noundef [[A:%.*]], i32* noundef [[P:%.*]], i64 noundef [[GA:%.*]]) #[[ATTR0:[0-9]+]] {
// TCHECK-64-NEXT:  entry:
// TCHECK-64-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
// TCHECK-64-NEXT:    [[P_ADDR:%.*]] = alloca i32*, align 8
// TCHECK-64-NEXT:    [[GA_ADDR:%.*]] = alloca i64, align 8
// TCHECK-64-NEXT:    store i64 [[A]], i64* [[A_ADDR]], align 8
// TCHECK-64-NEXT:    store i32* [[P]], i32** [[P_ADDR]], align 8
// TCHECK-64-NEXT:    store i64 [[GA]], i64* [[GA_ADDR]], align 8
// TCHECK-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
// TCHECK-64-NEXT:    [[CONV1:%.*]] = bitcast i64* [[GA_ADDR]] to i32*
// TCHECK-64-NEXT:    ret void
// TCHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70
// TCHECK-64-SAME: (i64 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR0]] {
// TCHECK-64-NEXT:  entry:
// TCHECK-64-NEXT:    [[AA_ADDR:%.*]] = alloca i64, align 8
// TCHECK-64-NEXT:    [[B_ADDR:%.*]] = alloca [10 x float]*, align 8
// TCHECK-64-NEXT:    [[VLA_ADDR:%.*]] = alloca i64, align 8
// TCHECK-64-NEXT:    [[BN_ADDR:%.*]] = alloca float*, align 8
// TCHECK-64-NEXT:    [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 8
// TCHECK-64-NEXT:    [[VLA_ADDR2:%.*]] = alloca i64, align 8
// TCHECK-64-NEXT:    [[VLA_ADDR4:%.*]] = alloca i64, align 8
// TCHECK-64-NEXT:    [[CN_ADDR:%.*]] = alloca double*, align 8
// TCHECK-64-NEXT:    [[D_ADDR:%.*]] = alloca %struct.TT*, align 8
// TCHECK-64-NEXT:    [[B5:%.*]] = alloca [10 x float], align 4
// TCHECK-64-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 8
// TCHECK-64-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// TCHECK-64-NEXT:    [[C7:%.*]] = alloca [5 x [10 x double]], align 8
// TCHECK-64-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i64, align 8
// TCHECK-64-NEXT:    [[__VLA_EXPR2:%.*]] = alloca i64, align 8
// TCHECK-64-NEXT:    [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 8
// TCHECK-64-NEXT:    store i64 [[AA]], i64* [[AA_ADDR]], align 8
// TCHECK-64-NEXT:    store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8
// TCHECK-64-NEXT:    store i64 [[VLA]], i64* [[VLA_ADDR]], align 8
// TCHECK-64-NEXT:    store float* [[BN]], float** [[BN_ADDR]], align 8
// TCHECK-64-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 8
// TCHECK-64-NEXT:    store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8
// TCHECK-64-NEXT:    store i64 [[VLA3]], i64* [[VLA_ADDR4]], align 8
// TCHECK-64-NEXT:    store double* [[CN]], double** [[CN_ADDR]], align 8
// TCHECK-64-NEXT:    store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 8
// TCHECK-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16*
// TCHECK-64-NEXT:    [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 8
// TCHECK-64-NEXT:    [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8
// TCHECK-64-NEXT:    [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 8
// TCHECK-64-NEXT:    [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 8
// TCHECK-64-NEXT:    [[TMP4:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8
// TCHECK-64-NEXT:    [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8
// TCHECK-64-NEXT:    [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8
// TCHECK-64-NEXT:    [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8
// TCHECK-64-NEXT:    [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8*
// TCHECK-64-NEXT:    [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8*
// TCHECK-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i64 40, i1 false)
// TCHECK-64-NEXT:    [[TMP10:%.*]] = call i8* @llvm.stacksave()
// TCHECK-64-NEXT:    store i8* [[TMP10]], i8** [[SAVED_STACK]], align 8
// TCHECK-64-NEXT:    [[VLA6:%.*]] = alloca float, i64 [[TMP1]], align 4
// TCHECK-64-NEXT:    store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8
// TCHECK-64-NEXT:    [[TMP11:%.*]] = mul nuw i64 [[TMP1]], 4
// TCHECK-64-NEXT:    [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8*
// TCHECK-64-NEXT:    [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8*
// TCHECK-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 [[TMP11]], i1 false)
// TCHECK-64-NEXT:    [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8*
// TCHECK-64-NEXT:    [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8*
// TCHECK-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 400, i1 false)
// TCHECK-64-NEXT:    [[TMP16:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]]
// TCHECK-64-NEXT:    [[VLA8:%.*]] = alloca double, i64 [[TMP16]], align 8
// TCHECK-64-NEXT:    store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8
// TCHECK-64-NEXT:    store i64 [[TMP5]], i64* [[__VLA_EXPR2]], align 8
// TCHECK-64-NEXT:    [[TMP17:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]]
// TCHECK-64-NEXT:    [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 8
// TCHECK-64-NEXT:    [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8*
// TCHECK-64-NEXT:    [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8*
// TCHECK-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i64 [[TMP18]], i1 false)
// TCHECK-64-NEXT:    [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8*
// TCHECK-64-NEXT:    [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8*
// TCHECK-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP21]], i8* align 8 [[TMP22]], i64 16, i1 false)
// TCHECK-64-NEXT:    [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2
// TCHECK-64-NEXT:    [[CONV10:%.*]] = sext i16 [[TMP23]] to i32
// TCHECK-64-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV10]], 1
// TCHECK-64-NEXT:    [[CONV11:%.*]] = trunc i32 [[ADD]] to i16
// TCHECK-64-NEXT:    store i16 [[CONV11]], i16* [[CONV]], align 2
// TCHECK-64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i64 0, i64 2
// TCHECK-64-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX]], align 4
// TCHECK-64-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i64 3
// TCHECK-64-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX12]], align 4
// TCHECK-64-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i64 0, i64 1
// TCHECK-64-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i64 0, i64 2
// TCHECK-64-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX14]], align 8
// TCHECK-64-NEXT:    [[TMP24:%.*]] = mul nsw i64 1, [[TMP5]]
// TCHECK-64-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i64 [[TMP24]]
// TCHECK-64-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i64 3
// TCHECK-64-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX16]], align 8
// TCHECK-64-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0
// TCHECK-64-NEXT:    store i64 1, i64* [[X]], align 8
// TCHECK-64-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1
// TCHECK-64-NEXT:    store i8 1, i8* [[Y]], align 8
// TCHECK-64-NEXT:    [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
// TCHECK-64-NEXT:    call void @llvm.stackrestore(i8* [[TMP25]])
// TCHECK-64-NEXT:    ret void
// TCHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111
// TCHECK-64-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR0]] {
// TCHECK-64-NEXT:  entry:
// TCHECK-64-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 8
// TCHECK-64-NEXT:    [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 8
// TCHECK-64-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 8
// TCHECK-64-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 8
// TCHECK-64-NEXT:    [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 8
// TCHECK-64-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0:%.*]], %struct.TT.0* [[TMP0]], i32 0, i32 0
// TCHECK-64-NEXT:    [[TMP1:%.*]] = load i32, i32* [[X]], align 4
// TCHECK-64-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP1]] to double
// TCHECK-64-NEXT:    [[TMP2:%.*]] = load double*, double** [[PTR_ADDR]], align 8
// TCHECK-64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP2]], i64 0
// TCHECK-64-NEXT:    store double [[CONV]], double* [[ARRAYIDX]], align 8
// TCHECK-64-NEXT:    [[TMP3:%.*]] = load double*, double** [[PTR_ADDR]], align 8
// TCHECK-64-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[TMP3]], i64 0
// TCHECK-64-NEXT:    [[TMP4:%.*]] = load double, double* [[ARRAYIDX1]], align 8
// TCHECK-64-NEXT:    [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00
// TCHECK-64-NEXT:    store double [[INC]], double* [[ARRAYIDX1]], align 8
// TCHECK-64-NEXT:    ret void
// TCHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142
// TCHECK-64-SAME: (i64 noundef [[A:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// TCHECK-64-NEXT:  entry:
// TCHECK-64-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
// TCHECK-64-NEXT:    [[AAA_ADDR:%.*]] = alloca i64, align 8
// TCHECK-64-NEXT:    [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8
// TCHECK-64-NEXT:    [[B2:%.*]] = alloca [10 x i32], align 4
// TCHECK-64-NEXT:    store i64 [[A]], i64* [[A_ADDR]], align 8
// TCHECK-64-NEXT:    store i64 [[AAA]], i64* [[AAA_ADDR]], align 8
// TCHECK-64-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8
// TCHECK-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
// TCHECK-64-NEXT:    [[CONV1:%.*]] = bitcast i64* [[AAA_ADDR]] to i8*
// TCHECK-64-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8
// TCHECK-64-NEXT:    [[TMP1:%.*]] = bitcast [10 x i32]* [[B2]] to i8*
// TCHECK-64-NEXT:    [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
// TCHECK-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false)
// TCHECK-64-NEXT:    [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4
// TCHECK-64-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// TCHECK-64-NEXT:    store i32 [[ADD]], i32* [[CONV]], align 4
// TCHECK-64-NEXT:    [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 1
// TCHECK-64-NEXT:    [[CONV3:%.*]] = sext i8 [[TMP4]] to i32
// TCHECK-64-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1
// TCHECK-64-NEXT:    [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8
// TCHECK-64-NEXT:    store i8 [[CONV5]], i8* [[CONV1]], align 1
// TCHECK-64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B2]], i64 0, i64 2
// TCHECK-64-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// TCHECK-64-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP5]], 1
// TCHECK-64-NEXT:    store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4
// TCHECK-64-NEXT:    ret void
// TCHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167
// TCHECK-64-SAME: (%struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR0]] {
// TCHECK-64-NEXT:  entry:
// TCHECK-64-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
// TCHECK-64-NEXT:    [[B_ADDR:%.*]] = alloca i64, align 8
// TCHECK-64-NEXT:    [[VLA_ADDR:%.*]] = alloca i64, align 8
// TCHECK-64-NEXT:    [[VLA_ADDR2:%.*]] = alloca i64, align 8
// TCHECK-64-NEXT:    [[C_ADDR:%.*]] = alloca i16*, align 8
// TCHECK-64-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 8
// TCHECK-64-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// TCHECK-64-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i64, align 8
// TCHECK-64-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
// TCHECK-64-NEXT:    store i64 [[B]], i64* [[B_ADDR]], align 8
// TCHECK-64-NEXT:    store i64 [[VLA]], i64* [[VLA_ADDR]], align 8
// TCHECK-64-NEXT:    store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8
// TCHECK-64-NEXT:    store i16* [[C]], i16** [[C_ADDR]], align 8
// TCHECK-64-NEXT:    [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
// TCHECK-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32*
// TCHECK-64-NEXT:    [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8
// TCHECK-64-NEXT:    [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8
// TCHECK-64-NEXT:    [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8
// TCHECK-64-NEXT:    [[TMP4:%.*]] = call i8* @llvm.stacksave()
// TCHECK-64-NEXT:    store i8* [[TMP4]], i8** [[SAVED_STACK]], align 8
// TCHECK-64-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]]
// TCHECK-64-NEXT:    [[VLA3:%.*]] = alloca i16, i64 [[TMP5]], align 2
// TCHECK-64-NEXT:    store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8
// TCHECK-64-NEXT:    store i64 [[TMP2]], i64* [[__VLA_EXPR1]], align 8
// TCHECK-64-NEXT:    [[TMP6:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]]
// TCHECK-64-NEXT:    [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 2
// TCHECK-64-NEXT:    [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8*
// TCHECK-64-NEXT:    [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8*
// TCHECK-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i64 [[TMP7]], i1 false)
// TCHECK-64-NEXT:    [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4
// TCHECK-64-NEXT:    [[CONV4:%.*]] = sitofp i32 [[TMP10]] to double
// TCHECK-64-NEXT:    [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00
// TCHECK-64-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0
// TCHECK-64-NEXT:    store double [[ADD]], double* [[A]], align 8
// TCHECK-64-NEXT:    [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0
// TCHECK-64-NEXT:    [[TMP11:%.*]] = load double, double* [[A5]], align 8
// TCHECK-64-NEXT:    [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00
// TCHECK-64-NEXT:    store double [[INC]], double* [[A5]], align 8
// TCHECK-64-NEXT:    [[CONV6:%.*]] = fptosi double [[INC]] to i16
// TCHECK-64-NEXT:    [[TMP12:%.*]] = mul nsw i64 1, [[TMP2]]
// TCHECK-64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i64 [[TMP12]]
// TCHECK-64-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1
// TCHECK-64-NEXT:    store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2
// TCHECK-64-NEXT:    [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
// TCHECK-64-NEXT:    call void @llvm.stackrestore(i8* [[TMP13]])
// TCHECK-64-NEXT:    ret void
// TCHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128
// TCHECK-64-SAME: (i64 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// TCHECK-64-NEXT:  entry:
// TCHECK-64-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
// TCHECK-64-NEXT:    [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8
// TCHECK-64-NEXT:    [[B1:%.*]] = alloca [10 x i32], align 4
// TCHECK-64-NEXT:    store i64 [[A]], i64* [[A_ADDR]], align 8
// TCHECK-64-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8
// TCHECK-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
// TCHECK-64-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8
// TCHECK-64-NEXT:    [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8*
// TCHECK-64-NEXT:    [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
// TCHECK-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false)
// TCHECK-64-NEXT:    [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4
// TCHECK-64-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// TCHECK-64-NEXT:    store i32 [[ADD]], i32* [[CONV]], align 4
// TCHECK-64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i64 0, i64 2
// TCHECK-64-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// TCHECK-64-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1
// TCHECK-64-NEXT:    store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4
// TCHECK-64-NEXT:    ret void
// TCHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63
// TCHECK1-64-SAME: (i64 noundef [[A:%.*]], i32* noundef [[P:%.*]], i64 noundef [[GA:%.*]]) #[[ATTR0:[0-9]+]] {
// TCHECK1-64-NEXT:  entry:
// TCHECK1-64-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
// TCHECK1-64-NEXT:    [[P_ADDR:%.*]] = alloca i32*, align 8
// TCHECK1-64-NEXT:    [[GA_ADDR:%.*]] = alloca i64, align 8
// TCHECK1-64-NEXT:    store i64 [[A]], i64* [[A_ADDR]], align 8
// TCHECK1-64-NEXT:    store i32* [[P]], i32** [[P_ADDR]], align 8
// TCHECK1-64-NEXT:    store i64 [[GA]], i64* [[GA_ADDR]], align 8
// TCHECK1-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
// TCHECK1-64-NEXT:    [[CONV1:%.*]] = bitcast i64* [[GA_ADDR]] to i32*
// TCHECK1-64-NEXT:    ret void
// TCHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70
// TCHECK1-64-SAME: (i64 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR0]] {
// TCHECK1-64-NEXT:  entry:
// TCHECK1-64-NEXT:    [[AA_ADDR:%.*]] = alloca i64, align 8
// TCHECK1-64-NEXT:    [[B_ADDR:%.*]] = alloca [10 x float]*, align 8
// TCHECK1-64-NEXT:    [[VLA_ADDR:%.*]] = alloca i64, align 8
// TCHECK1-64-NEXT:    [[BN_ADDR:%.*]] = alloca float*, align 8
// TCHECK1-64-NEXT:    [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 8
// TCHECK1-64-NEXT:    [[VLA_ADDR2:%.*]] = alloca i64, align 8
// TCHECK1-64-NEXT:    [[VLA_ADDR4:%.*]] = alloca i64, align 8
// TCHECK1-64-NEXT:    [[CN_ADDR:%.*]] = alloca double*, align 8
// TCHECK1-64-NEXT:    [[D_ADDR:%.*]] = alloca %struct.TT*, align 8
// TCHECK1-64-NEXT:    [[B5:%.*]] = alloca [10 x float], align 4
// TCHECK1-64-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 8
// TCHECK1-64-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// TCHECK1-64-NEXT:    [[C7:%.*]] = alloca [5 x [10 x double]], align 8
// TCHECK1-64-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i64, align 8
// TCHECK1-64-NEXT:    [[__VLA_EXPR2:%.*]] = alloca i64, align 8
// TCHECK1-64-NEXT:    [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 8
// TCHECK1-64-NEXT:    store i64 [[AA]], i64* [[AA_ADDR]], align 8
// TCHECK1-64-NEXT:    store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8
// TCHECK1-64-NEXT:    store i64 [[VLA]], i64* [[VLA_ADDR]], align 8
// TCHECK1-64-NEXT:    store float* [[BN]], float** [[BN_ADDR]], align 8
// TCHECK1-64-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 8
// TCHECK1-64-NEXT:    store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8
// TCHECK1-64-NEXT:    store i64 [[VLA3]], i64* [[VLA_ADDR4]], align 8
// TCHECK1-64-NEXT:    store double* [[CN]], double** [[CN_ADDR]], align 8
// TCHECK1-64-NEXT:    store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 8
// TCHECK1-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16*
// TCHECK1-64-NEXT:    [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 8
// TCHECK1-64-NEXT:    [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8
// TCHECK1-64-NEXT:    [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 8
// TCHECK1-64-NEXT:    [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 8
// TCHECK1-64-NEXT:    [[TMP4:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8
// TCHECK1-64-NEXT:    [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8
// TCHECK1-64-NEXT:    [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8
// TCHECK1-64-NEXT:    [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8
// TCHECK1-64-NEXT:    [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8*
// TCHECK1-64-NEXT:    [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8*
// TCHECK1-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i64 40, i1 false)
// TCHECK1-64-NEXT:    [[TMP10:%.*]] = call i8* @llvm.stacksave()
// TCHECK1-64-NEXT:    store i8* [[TMP10]], i8** [[SAVED_STACK]], align 8
// TCHECK1-64-NEXT:    [[VLA6:%.*]] = alloca float, i64 [[TMP1]], align 4
// TCHECK1-64-NEXT:    store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8
// TCHECK1-64-NEXT:    [[TMP11:%.*]] = mul nuw i64 [[TMP1]], 4
// TCHECK1-64-NEXT:    [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8*
// TCHECK1-64-NEXT:    [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8*
// TCHECK1-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 [[TMP11]], i1 false)
// TCHECK1-64-NEXT:    [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8*
// TCHECK1-64-NEXT:    [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8*
// TCHECK1-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 400, i1 false)
// TCHECK1-64-NEXT:    [[TMP16:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]]
// TCHECK1-64-NEXT:    [[VLA8:%.*]] = alloca double, i64 [[TMP16]], align 8
// TCHECK1-64-NEXT:    store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8
// TCHECK1-64-NEXT:    store i64 [[TMP5]], i64* [[__VLA_EXPR2]], align 8
// TCHECK1-64-NEXT:    [[TMP17:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]]
// TCHECK1-64-NEXT:    [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 8
// TCHECK1-64-NEXT:    [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8*
// TCHECK1-64-NEXT:    [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8*
// TCHECK1-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i64 [[TMP18]], i1 false)
// TCHECK1-64-NEXT:    [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8*
// TCHECK1-64-NEXT:    [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8*
// TCHECK1-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP21]], i8* align 8 [[TMP22]], i64 16, i1 false)
// TCHECK1-64-NEXT:    [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2
// TCHECK1-64-NEXT:    [[CONV10:%.*]] = sext i16 [[TMP23]] to i32
// TCHECK1-64-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV10]], 1
// TCHECK1-64-NEXT:    [[CONV11:%.*]] = trunc i32 [[ADD]] to i16
// TCHECK1-64-NEXT:    store i16 [[CONV11]], i16* [[CONV]], align 2
// TCHECK1-64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i64 0, i64 2
// TCHECK1-64-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX]], align 4
// TCHECK1-64-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i64 3
// TCHECK1-64-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX12]], align 4
// TCHECK1-64-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i64 0, i64 1
// TCHECK1-64-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i64 0, i64 2
// TCHECK1-64-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX14]], align 8
// TCHECK1-64-NEXT:    [[TMP24:%.*]] = mul nsw i64 1, [[TMP5]]
// TCHECK1-64-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i64 [[TMP24]]
// TCHECK1-64-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i64 3
// TCHECK1-64-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX16]], align 8
// TCHECK1-64-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0
// TCHECK1-64-NEXT:    store i64 1, i64* [[X]], align 8
// TCHECK1-64-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1
// TCHECK1-64-NEXT:    store i8 1, i8* [[Y]], align 8
// TCHECK1-64-NEXT:    [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
// TCHECK1-64-NEXT:    call void @llvm.stackrestore(i8* [[TMP25]])
// TCHECK1-64-NEXT:    ret void
// TCHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111
// TCHECK1-64-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR0]] {
// TCHECK1-64-NEXT:  entry:
// TCHECK1-64-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 8
// TCHECK1-64-NEXT:    [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 8
// TCHECK1-64-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 8
// TCHECK1-64-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 8
// TCHECK1-64-NEXT:    [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 8
// TCHECK1-64-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0:%.*]], %struct.TT.0* [[TMP0]], i32 0, i32 0
// TCHECK1-64-NEXT:    [[TMP1:%.*]] = load i32, i32* [[X]], align 4
// TCHECK1-64-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP1]] to double
// TCHECK1-64-NEXT:    [[TMP2:%.*]] = load double*, double** [[PTR_ADDR]], align 8
// TCHECK1-64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP2]], i64 0
// TCHECK1-64-NEXT:    store double [[CONV]], double* [[ARRAYIDX]], align 8
// TCHECK1-64-NEXT:    [[TMP3:%.*]] = load double*, double** [[PTR_ADDR]], align 8
// TCHECK1-64-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[TMP3]], i64 0
// TCHECK1-64-NEXT:    [[TMP4:%.*]] = load double, double* [[ARRAYIDX1]], align 8
// TCHECK1-64-NEXT:    [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00
// TCHECK1-64-NEXT:    store double [[INC]], double* [[ARRAYIDX1]], align 8
// TCHECK1-64-NEXT:    ret void
// TCHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142
// TCHECK1-64-SAME: (i64 noundef [[A:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// TCHECK1-64-NEXT:  entry:
// TCHECK1-64-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
// TCHECK1-64-NEXT:    [[AAA_ADDR:%.*]] = alloca i64, align 8
// TCHECK1-64-NEXT:    [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8
// TCHECK1-64-NEXT:    [[B2:%.*]] = alloca [10 x i32], align 4
// TCHECK1-64-NEXT:    store i64 [[A]], i64* [[A_ADDR]], align 8
// TCHECK1-64-NEXT:    store i64 [[AAA]], i64* [[AAA_ADDR]], align 8
// TCHECK1-64-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8
// TCHECK1-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
// TCHECK1-64-NEXT:    [[CONV1:%.*]] = bitcast i64* [[AAA_ADDR]] to i8*
// TCHECK1-64-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8
// TCHECK1-64-NEXT:    [[TMP1:%.*]] = bitcast [10 x i32]* [[B2]] to i8*
// TCHECK1-64-NEXT:    [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
// TCHECK1-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false)
// TCHECK1-64-NEXT:    [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4
// TCHECK1-64-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// TCHECK1-64-NEXT:    store i32 [[ADD]], i32* [[CONV]], align 4
// TCHECK1-64-NEXT:    [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 1
// TCHECK1-64-NEXT:    [[CONV3:%.*]] = sext i8 [[TMP4]] to i32
// TCHECK1-64-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1
// TCHECK1-64-NEXT:    [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8
// TCHECK1-64-NEXT:    store i8 [[CONV5]], i8* [[CONV1]], align 1
// TCHECK1-64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B2]], i64 0, i64 2
// TCHECK1-64-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// TCHECK1-64-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP5]], 1
// TCHECK1-64-NEXT:    store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4
// TCHECK1-64-NEXT:    ret void
// TCHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167
// TCHECK1-64-SAME: (%struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR0]] {
// TCHECK1-64-NEXT:  entry:
// TCHECK1-64-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
// TCHECK1-64-NEXT:    [[B_ADDR:%.*]] = alloca i64, align 8
// TCHECK1-64-NEXT:    [[VLA_ADDR:%.*]] = alloca i64, align 8
// TCHECK1-64-NEXT:    [[VLA_ADDR2:%.*]] = alloca i64, align 8
// TCHECK1-64-NEXT:    [[C_ADDR:%.*]] = alloca i16*, align 8
// TCHECK1-64-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 8
// TCHECK1-64-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// TCHECK1-64-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i64, align 8
// TCHECK1-64-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
// TCHECK1-64-NEXT:    store i64 [[B]], i64* [[B_ADDR]], align 8
// TCHECK1-64-NEXT:    store i64 [[VLA]], i64* [[VLA_ADDR]], align 8
// TCHECK1-64-NEXT:    store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8
// TCHECK1-64-NEXT:    store i16* [[C]], i16** [[C_ADDR]], align 8
// TCHECK1-64-NEXT:    [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
// TCHECK1-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32*
// TCHECK1-64-NEXT:    [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8
// TCHECK1-64-NEXT:    [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8
// TCHECK1-64-NEXT:    [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8
// TCHECK1-64-NEXT:    [[TMP4:%.*]] = call i8* @llvm.stacksave()
// TCHECK1-64-NEXT:    store i8* [[TMP4]], i8** [[SAVED_STACK]], align 8
// TCHECK1-64-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]]
// TCHECK1-64-NEXT:    [[VLA3:%.*]] = alloca i16, i64 [[TMP5]], align 2
// TCHECK1-64-NEXT:    store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8
// TCHECK1-64-NEXT:    store i64 [[TMP2]], i64* [[__VLA_EXPR1]], align 8
// TCHECK1-64-NEXT:    [[TMP6:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]]
// TCHECK1-64-NEXT:    [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 2
// TCHECK1-64-NEXT:    [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8*
// TCHECK1-64-NEXT:    [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8*
// TCHECK1-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i64 [[TMP7]], i1 false)
// TCHECK1-64-NEXT:    [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4
// TCHECK1-64-NEXT:    [[CONV4:%.*]] = sitofp i32 [[TMP10]] to double
// TCHECK1-64-NEXT:    [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00
// TCHECK1-64-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0
// TCHECK1-64-NEXT:    store double [[ADD]], double* [[A]], align 8
// TCHECK1-64-NEXT:    [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0
// TCHECK1-64-NEXT:    [[TMP11:%.*]] = load double, double* [[A5]], align 8
// TCHECK1-64-NEXT:    [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00
// TCHECK1-64-NEXT:    store double [[INC]], double* [[A5]], align 8
// TCHECK1-64-NEXT:    [[CONV6:%.*]] = fptosi double [[INC]] to i16
// TCHECK1-64-NEXT:    [[TMP12:%.*]] = mul nsw i64 1, [[TMP2]]
// TCHECK1-64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i64 [[TMP12]]
// TCHECK1-64-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1
// TCHECK1-64-NEXT:    store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2
// TCHECK1-64-NEXT:    [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
// TCHECK1-64-NEXT:    call void @llvm.stackrestore(i8* [[TMP13]])
// TCHECK1-64-NEXT:    ret void
// TCHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128
// TCHECK1-64-SAME: (i64 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// TCHECK1-64-NEXT:  entry:
// TCHECK1-64-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
// TCHECK1-64-NEXT:    [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8
// TCHECK1-64-NEXT:    [[B1:%.*]] = alloca [10 x i32], align 4
// TCHECK1-64-NEXT:    store i64 [[A]], i64* [[A_ADDR]], align 8
// TCHECK1-64-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8
// TCHECK1-64-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
// TCHECK1-64-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8
// TCHECK1-64-NEXT:    [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8*
// TCHECK1-64-NEXT:    [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
// TCHECK1-64-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false)
// TCHECK1-64-NEXT:    [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4
// TCHECK1-64-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// TCHECK1-64-NEXT:    store i32 [[ADD]], i32* [[CONV]], align 4
// TCHECK1-64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i64 0, i64 2
// TCHECK1-64-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// TCHECK1-64-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1
// TCHECK1-64-NEXT:    store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4
// TCHECK1-64-NEXT:    ret void
// TCHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63
// TCHECK2-32-SAME: (i32 noundef [[A:%.*]], i32* noundef [[P:%.*]], i32 noundef [[GA:%.*]]) #[[ATTR0:[0-9]+]] {
// TCHECK2-32-NEXT:  entry:
// TCHECK2-32-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
// TCHECK2-32-NEXT:    [[P_ADDR:%.*]] = alloca i32*, align 4
// TCHECK2-32-NEXT:    [[GA_ADDR:%.*]] = alloca i32, align 4
// TCHECK2-32-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
// TCHECK2-32-NEXT:    store i32* [[P]], i32** [[P_ADDR]], align 4
// TCHECK2-32-NEXT:    store i32 [[GA]], i32* [[GA_ADDR]], align 4
// TCHECK2-32-NEXT:    ret void
// TCHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70
// TCHECK2-32-SAME: (i32 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR0]] {
// TCHECK2-32-NEXT:  entry:
// TCHECK2-32-NEXT:    [[AA_ADDR:%.*]] = alloca i32, align 4
// TCHECK2-32-NEXT:    [[B_ADDR:%.*]] = alloca [10 x float]*, align 4
// TCHECK2-32-NEXT:    [[VLA_ADDR:%.*]] = alloca i32, align 4
// TCHECK2-32-NEXT:    [[BN_ADDR:%.*]] = alloca float*, align 4
// TCHECK2-32-NEXT:    [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 4
// TCHECK2-32-NEXT:    [[VLA_ADDR2:%.*]] = alloca i32, align 4
// TCHECK2-32-NEXT:    [[VLA_ADDR4:%.*]] = alloca i32, align 4
// TCHECK2-32-NEXT:    [[CN_ADDR:%.*]] = alloca double*, align 4
// TCHECK2-32-NEXT:    [[D_ADDR:%.*]] = alloca %struct.TT*, align 4
// TCHECK2-32-NEXT:    [[B5:%.*]] = alloca [10 x float], align 4
// TCHECK2-32-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 4
// TCHECK2-32-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// TCHECK2-32-NEXT:    [[C7:%.*]] = alloca [5 x [10 x double]], align 8
// TCHECK2-32-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i32, align 4
// TCHECK2-32-NEXT:    [[__VLA_EXPR2:%.*]] = alloca i32, align 4
// TCHECK2-32-NEXT:    [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 4
// TCHECK2-32-NEXT:    store i32 [[AA]], i32* [[AA_ADDR]], align 4
// TCHECK2-32-NEXT:    store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4
// TCHECK2-32-NEXT:    store i32 [[VLA]], i32* [[VLA_ADDR]], align 4
// TCHECK2-32-NEXT:    store float* [[BN]], float** [[BN_ADDR]], align 4
// TCHECK2-32-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 4
// TCHECK2-32-NEXT:    store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4
// TCHECK2-32-NEXT:    store i32 [[VLA3]], i32* [[VLA_ADDR4]], align 4
// TCHECK2-32-NEXT:    store double* [[CN]], double** [[CN_ADDR]], align 4
// TCHECK2-32-NEXT:    store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 4
// TCHECK2-32-NEXT:    [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16*
// TCHECK2-32-NEXT:    [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 4
// TCHECK2-32-NEXT:    [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4
// TCHECK2-32-NEXT:    [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 4
// TCHECK2-32-NEXT:    [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 4
// TCHECK2-32-NEXT:    [[TMP4:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4
// TCHECK2-32-NEXT:    [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4
// TCHECK2-32-NEXT:    [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4
// TCHECK2-32-NEXT:    [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4
// TCHECK2-32-NEXT:    [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8*
// TCHECK2-32-NEXT:    [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8*
// TCHECK2-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i32 40, i1 false)
// TCHECK2-32-NEXT:    [[TMP10:%.*]] = call i8* @llvm.stacksave()
// TCHECK2-32-NEXT:    store i8* [[TMP10]], i8** [[SAVED_STACK]], align 4
// TCHECK2-32-NEXT:    [[VLA6:%.*]] = alloca float, i32 [[TMP1]], align 4
// TCHECK2-32-NEXT:    store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4
// TCHECK2-32-NEXT:    [[TMP11:%.*]] = mul nuw i32 [[TMP1]], 4
// TCHECK2-32-NEXT:    [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8*
// TCHECK2-32-NEXT:    [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8*
// TCHECK2-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 [[TMP11]], i1 false)
// TCHECK2-32-NEXT:    [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8*
// TCHECK2-32-NEXT:    [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8*
// TCHECK2-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i32 400, i1 false)
// TCHECK2-32-NEXT:    [[TMP16:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]]
// TCHECK2-32-NEXT:    [[VLA8:%.*]] = alloca double, i32 [[TMP16]], align 8
// TCHECK2-32-NEXT:    store i32 [[TMP4]], i32* [[__VLA_EXPR1]], align 4
// TCHECK2-32-NEXT:    store i32 [[TMP5]], i32* [[__VLA_EXPR2]], align 4
// TCHECK2-32-NEXT:    [[TMP17:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]]
// TCHECK2-32-NEXT:    [[TMP18:%.*]] = mul nuw i32 [[TMP17]], 8
// TCHECK2-32-NEXT:    [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8*
// TCHECK2-32-NEXT:    [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8*
// TCHECK2-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i32 [[TMP18]], i1 false)
// TCHECK2-32-NEXT:    [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8*
// TCHECK2-32-NEXT:    [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8*
// TCHECK2-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP21]], i8* align 4 [[TMP22]], i32 12, i1 false)
// TCHECK2-32-NEXT:    [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2
// TCHECK2-32-NEXT:    [[CONV10:%.*]] = sext i16 [[TMP23]] to i32
// TCHECK2-32-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV10]], 1
// TCHECK2-32-NEXT:    [[CONV11:%.*]] = trunc i32 [[ADD]] to i16
// TCHECK2-32-NEXT:    store i16 [[CONV11]], i16* [[CONV]], align 2
// TCHECK2-32-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i32 0, i32 2
// TCHECK2-32-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX]], align 4
// TCHECK2-32-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i32 3
// TCHECK2-32-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX12]], align 4
// TCHECK2-32-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i32 0, i32 1
// TCHECK2-32-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i32 0, i32 2
// TCHECK2-32-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX14]], align 8
// TCHECK2-32-NEXT:    [[TMP24:%.*]] = mul nsw i32 1, [[TMP5]]
// TCHECK2-32-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i32 [[TMP24]]
// TCHECK2-32-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i32 3
// TCHECK2-32-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX16]], align 8
// TCHECK2-32-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0
// TCHECK2-32-NEXT:    store i64 1, i64* [[X]], align 4
// TCHECK2-32-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1
// TCHECK2-32-NEXT:    store i8 1, i8* [[Y]], align 4
// TCHECK2-32-NEXT:    [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
// TCHECK2-32-NEXT:    call void @llvm.stackrestore(i8* [[TMP25]])
// TCHECK2-32-NEXT:    ret void
// TCHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111
// TCHECK2-32-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR0]] {
// TCHECK2-32-NEXT:  entry:
// TCHECK2-32-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 4
// TCHECK2-32-NEXT:    [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 4
// TCHECK2-32-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 4
// TCHECK2-32-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 4
// TCHECK2-32-NEXT:    [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 4
// TCHECK2-32-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0:%.*]], %struct.TT.0* [[TMP0]], i32 0, i32 0
// TCHECK2-32-NEXT:    [[TMP1:%.*]] = load i32, i32* [[X]], align 4
// TCHECK2-32-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP1]] to double
// TCHECK2-32-NEXT:    [[TMP2:%.*]] = load double*, double** [[PTR_ADDR]], align 4
// TCHECK2-32-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP2]], i32 0
// TCHECK2-32-NEXT:    store double [[CONV]], double* [[ARRAYIDX]], align 4
// TCHECK2-32-NEXT:    [[TMP3:%.*]] = load double*, double** [[PTR_ADDR]], align 4
// TCHECK2-32-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[TMP3]], i32 0
// TCHECK2-32-NEXT:    [[TMP4:%.*]] = load double, double* [[ARRAYIDX1]], align 4
// TCHECK2-32-NEXT:    [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00
// TCHECK2-32-NEXT:    store double [[INC]], double* [[ARRAYIDX1]], align 4
// TCHECK2-32-NEXT:    ret void
// TCHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142
// TCHECK2-32-SAME: (i32 noundef [[A:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// TCHECK2-32-NEXT:  entry:
// TCHECK2-32-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
// TCHECK2-32-NEXT:    [[AAA_ADDR:%.*]] = alloca i32, align 4
// TCHECK2-32-NEXT:    [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4
// TCHECK2-32-NEXT:    [[B1:%.*]] = alloca [10 x i32], align 4
// TCHECK2-32-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
// TCHECK2-32-NEXT:    store i32 [[AAA]], i32* [[AAA_ADDR]], align 4
// TCHECK2-32-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4
// TCHECK2-32-NEXT:    [[CONV:%.*]] = bitcast i32* [[AAA_ADDR]] to i8*
// TCHECK2-32-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4
// TCHECK2-32-NEXT:    [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8*
// TCHECK2-32-NEXT:    [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
// TCHECK2-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false)
// TCHECK2-32-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
// TCHECK2-32-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// TCHECK2-32-NEXT:    store i32 [[ADD]], i32* [[A_ADDR]], align 4
// TCHECK2-32-NEXT:    [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1
// TCHECK2-32-NEXT:    [[CONV2:%.*]] = sext i8 [[TMP4]] to i32
// TCHECK2-32-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1
// TCHECK2-32-NEXT:    [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8
// TCHECK2-32-NEXT:    store i8 [[CONV4]], i8* [[CONV]], align 1
// TCHECK2-32-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2
// TCHECK2-32-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// TCHECK2-32-NEXT:    [[ADD5:%.*]] = add nsw i32 [[TMP5]], 1
// TCHECK2-32-NEXT:    store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4
// TCHECK2-32-NEXT:    ret void
// TCHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167
// TCHECK2-32-SAME: (%struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR0]] {
// TCHECK2-32-NEXT:  entry:
// TCHECK2-32-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4
// TCHECK2-32-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
// TCHECK2-32-NEXT:    [[VLA_ADDR:%.*]] = alloca i32, align 4
// TCHECK2-32-NEXT:    [[VLA_ADDR2:%.*]] = alloca i32, align 4
// TCHECK2-32-NEXT:    [[C_ADDR:%.*]] = alloca i16*, align 4
// TCHECK2-32-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 4
// TCHECK2-32-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// TCHECK2-32-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i32, align 4
// TCHECK2-32-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4
// TCHECK2-32-NEXT:    store i32 [[B]], i32* [[B_ADDR]], align 4
// TCHECK2-32-NEXT:    store i32 [[VLA]], i32* [[VLA_ADDR]], align 4
// TCHECK2-32-NEXT:    store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4
// TCHECK2-32-NEXT:    store i16* [[C]], i16** [[C_ADDR]], align 4
// TCHECK2-32-NEXT:    [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4
// TCHECK2-32-NEXT:    [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4
// TCHECK2-32-NEXT:    [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4
// TCHECK2-32-NEXT:    [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4
// TCHECK2-32-NEXT:    [[TMP4:%.*]] = call i8* @llvm.stacksave()
// TCHECK2-32-NEXT:    store i8* [[TMP4]], i8** [[SAVED_STACK]], align 4
// TCHECK2-32-NEXT:    [[TMP5:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
// TCHECK2-32-NEXT:    [[VLA3:%.*]] = alloca i16, i32 [[TMP5]], align 2
// TCHECK2-32-NEXT:    store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4
// TCHECK2-32-NEXT:    store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4
// TCHECK2-32-NEXT:    [[TMP6:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
// TCHECK2-32-NEXT:    [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2
// TCHECK2-32-NEXT:    [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8*
// TCHECK2-32-NEXT:    [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8*
// TCHECK2-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i32 [[TMP7]], i1 false)
// TCHECK2-32-NEXT:    [[TMP10:%.*]] = load i32, i32* [[B_ADDR]], align 4
// TCHECK2-32-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP10]] to double
// TCHECK2-32-NEXT:    [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00
// TCHECK2-32-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0
// TCHECK2-32-NEXT:    store double [[ADD]], double* [[A]], align 4
// TCHECK2-32-NEXT:    [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0
// TCHECK2-32-NEXT:    [[TMP11:%.*]] = load double, double* [[A4]], align 4
// TCHECK2-32-NEXT:    [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00
// TCHECK2-32-NEXT:    store double [[INC]], double* [[A4]], align 4
// TCHECK2-32-NEXT:    [[CONV5:%.*]] = fptosi double [[INC]] to i16
// TCHECK2-32-NEXT:    [[TMP12:%.*]] = mul nsw i32 1, [[TMP2]]
// TCHECK2-32-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i32 [[TMP12]]
// TCHECK2-32-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1
// TCHECK2-32-NEXT:    store i16 [[CONV5]], i16* [[ARRAYIDX6]], align 2
// TCHECK2-32-NEXT:    [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
// TCHECK2-32-NEXT:    call void @llvm.stackrestore(i8* [[TMP13]])
// TCHECK2-32-NEXT:    ret void
// TCHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128
// TCHECK2-32-SAME: (i32 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// TCHECK2-32-NEXT:  entry:
// TCHECK2-32-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
// TCHECK2-32-NEXT:    [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4
// TCHECK2-32-NEXT:    [[B1:%.*]] = alloca [10 x i32], align 4
// TCHECK2-32-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
// TCHECK2-32-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4
// TCHECK2-32-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4
// TCHECK2-32-NEXT:    [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8*
// TCHECK2-32-NEXT:    [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
// TCHECK2-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false)
// TCHECK2-32-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
// TCHECK2-32-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// TCHECK2-32-NEXT:    store i32 [[ADD]], i32* [[A_ADDR]], align 4
// TCHECK2-32-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2
// TCHECK2-32-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// TCHECK2-32-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1
// TCHECK2-32-NEXT:    store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4
// TCHECK2-32-NEXT:    ret void
// TCHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63
// TCHECK3-32-SAME: (i32 noundef [[A:%.*]], i32* noundef [[P:%.*]], i32 noundef [[GA:%.*]]) #[[ATTR0:[0-9]+]] {
// TCHECK3-32-NEXT:  entry:
// TCHECK3-32-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
// TCHECK3-32-NEXT:    [[P_ADDR:%.*]] = alloca i32*, align 4
// TCHECK3-32-NEXT:    [[GA_ADDR:%.*]] = alloca i32, align 4
// TCHECK3-32-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
// TCHECK3-32-NEXT:    store i32* [[P]], i32** [[P_ADDR]], align 4
// TCHECK3-32-NEXT:    store i32 [[GA]], i32* [[GA_ADDR]], align 4
// TCHECK3-32-NEXT:    ret void
// TCHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70
// TCHECK3-32-SAME: (i32 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR0]] {
// TCHECK3-32-NEXT:  entry:
// TCHECK3-32-NEXT:    [[AA_ADDR:%.*]] = alloca i32, align 4
// TCHECK3-32-NEXT:    [[B_ADDR:%.*]] = alloca [10 x float]*, align 4
// TCHECK3-32-NEXT:    [[VLA_ADDR:%.*]] = alloca i32, align 4
// TCHECK3-32-NEXT:    [[BN_ADDR:%.*]] = alloca float*, align 4
// TCHECK3-32-NEXT:    [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 4
// TCHECK3-32-NEXT:    [[VLA_ADDR2:%.*]] = alloca i32, align 4
// TCHECK3-32-NEXT:    [[VLA_ADDR4:%.*]] = alloca i32, align 4
// TCHECK3-32-NEXT:    [[CN_ADDR:%.*]] = alloca double*, align 4
// TCHECK3-32-NEXT:    [[D_ADDR:%.*]] = alloca %struct.TT*, align 4
// TCHECK3-32-NEXT:    [[B5:%.*]] = alloca [10 x float], align 4
// TCHECK3-32-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 4
// TCHECK3-32-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// TCHECK3-32-NEXT:    [[C7:%.*]] = alloca [5 x [10 x double]], align 8
// TCHECK3-32-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i32, align 4
// TCHECK3-32-NEXT:    [[__VLA_EXPR2:%.*]] = alloca i32, align 4
// TCHECK3-32-NEXT:    [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 4
// TCHECK3-32-NEXT:    store i32 [[AA]], i32* [[AA_ADDR]], align 4
// TCHECK3-32-NEXT:    store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4
// TCHECK3-32-NEXT:    store i32 [[VLA]], i32* [[VLA_ADDR]], align 4
// TCHECK3-32-NEXT:    store float* [[BN]], float** [[BN_ADDR]], align 4
// TCHECK3-32-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 4
// TCHECK3-32-NEXT:    store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4
// TCHECK3-32-NEXT:    store i32 [[VLA3]], i32* [[VLA_ADDR4]], align 4
// TCHECK3-32-NEXT:    store double* [[CN]], double** [[CN_ADDR]], align 4
// TCHECK3-32-NEXT:    store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 4
// TCHECK3-32-NEXT:    [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16*
// TCHECK3-32-NEXT:    [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 4
// TCHECK3-32-NEXT:    [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4
// TCHECK3-32-NEXT:    [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 4
// TCHECK3-32-NEXT:    [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 4
// TCHECK3-32-NEXT:    [[TMP4:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4
// TCHECK3-32-NEXT:    [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4
// TCHECK3-32-NEXT:    [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4
// TCHECK3-32-NEXT:    [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4
// TCHECK3-32-NEXT:    [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8*
// TCHECK3-32-NEXT:    [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8*
// TCHECK3-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i32 40, i1 false)
// TCHECK3-32-NEXT:    [[TMP10:%.*]] = call i8* @llvm.stacksave()
// TCHECK3-32-NEXT:    store i8* [[TMP10]], i8** [[SAVED_STACK]], align 4
// TCHECK3-32-NEXT:    [[VLA6:%.*]] = alloca float, i32 [[TMP1]], align 4
// TCHECK3-32-NEXT:    store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4
// TCHECK3-32-NEXT:    [[TMP11:%.*]] = mul nuw i32 [[TMP1]], 4
// TCHECK3-32-NEXT:    [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8*
// TCHECK3-32-NEXT:    [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8*
// TCHECK3-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 [[TMP11]], i1 false)
// TCHECK3-32-NEXT:    [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8*
// TCHECK3-32-NEXT:    [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8*
// TCHECK3-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i32 400, i1 false)
// TCHECK3-32-NEXT:    [[TMP16:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]]
// TCHECK3-32-NEXT:    [[VLA8:%.*]] = alloca double, i32 [[TMP16]], align 8
// TCHECK3-32-NEXT:    store i32 [[TMP4]], i32* [[__VLA_EXPR1]], align 4
// TCHECK3-32-NEXT:    store i32 [[TMP5]], i32* [[__VLA_EXPR2]], align 4
// TCHECK3-32-NEXT:    [[TMP17:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]]
// TCHECK3-32-NEXT:    [[TMP18:%.*]] = mul nuw i32 [[TMP17]], 8
// TCHECK3-32-NEXT:    [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8*
// TCHECK3-32-NEXT:    [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8*
// TCHECK3-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i32 [[TMP18]], i1 false)
// TCHECK3-32-NEXT:    [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8*
// TCHECK3-32-NEXT:    [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8*
// TCHECK3-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP21]], i8* align 4 [[TMP22]], i32 12, i1 false)
// TCHECK3-32-NEXT:    [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2
// TCHECK3-32-NEXT:    [[CONV10:%.*]] = sext i16 [[TMP23]] to i32
// TCHECK3-32-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV10]], 1
// TCHECK3-32-NEXT:    [[CONV11:%.*]] = trunc i32 [[ADD]] to i16
// TCHECK3-32-NEXT:    store i16 [[CONV11]], i16* [[CONV]], align 2
// TCHECK3-32-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i32 0, i32 2
// TCHECK3-32-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX]], align 4
// TCHECK3-32-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i32 3
// TCHECK3-32-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX12]], align 4
// TCHECK3-32-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i32 0, i32 1
// TCHECK3-32-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i32 0, i32 2
// TCHECK3-32-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX14]], align 8
// TCHECK3-32-NEXT:    [[TMP24:%.*]] = mul nsw i32 1, [[TMP5]]
// TCHECK3-32-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i32 [[TMP24]]
// TCHECK3-32-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i32 3
// TCHECK3-32-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX16]], align 8
// TCHECK3-32-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0
// TCHECK3-32-NEXT:    store i64 1, i64* [[X]], align 4
// TCHECK3-32-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1
// TCHECK3-32-NEXT:    store i8 1, i8* [[Y]], align 4
// TCHECK3-32-NEXT:    [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
// TCHECK3-32-NEXT:    call void @llvm.stackrestore(i8* [[TMP25]])
// TCHECK3-32-NEXT:    ret void
// TCHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111
// TCHECK3-32-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR0]] {
// TCHECK3-32-NEXT:  entry:
// TCHECK3-32-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 4
// TCHECK3-32-NEXT:    [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 4
// TCHECK3-32-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 4
// TCHECK3-32-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 4
// TCHECK3-32-NEXT:    [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 4
// TCHECK3-32-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0:%.*]], %struct.TT.0* [[TMP0]], i32 0, i32 0
// TCHECK3-32-NEXT:    [[TMP1:%.*]] = load i32, i32* [[X]], align 4
// TCHECK3-32-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP1]] to double
// TCHECK3-32-NEXT:    [[TMP2:%.*]] = load double*, double** [[PTR_ADDR]], align 4
// TCHECK3-32-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP2]], i32 0
// TCHECK3-32-NEXT:    store double [[CONV]], double* [[ARRAYIDX]], align 4
// TCHECK3-32-NEXT:    [[TMP3:%.*]] = load double*, double** [[PTR_ADDR]], align 4
// TCHECK3-32-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[TMP3]], i32 0
// TCHECK3-32-NEXT:    [[TMP4:%.*]] = load double, double* [[ARRAYIDX1]], align 4
// TCHECK3-32-NEXT:    [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00
// TCHECK3-32-NEXT:    store double [[INC]], double* [[ARRAYIDX1]], align 4
// TCHECK3-32-NEXT:    ret void
// TCHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142
// TCHECK3-32-SAME: (i32 noundef [[A:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// TCHECK3-32-NEXT:  entry:
// TCHECK3-32-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
// TCHECK3-32-NEXT:    [[AAA_ADDR:%.*]] = alloca i32, align 4
// TCHECK3-32-NEXT:    [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4
// TCHECK3-32-NEXT:    [[B1:%.*]] = alloca [10 x i32], align 4
// TCHECK3-32-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
// TCHECK3-32-NEXT:    store i32 [[AAA]], i32* [[AAA_ADDR]], align 4
// TCHECK3-32-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4
// TCHECK3-32-NEXT:    [[CONV:%.*]] = bitcast i32* [[AAA_ADDR]] to i8*
// TCHECK3-32-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4
// TCHECK3-32-NEXT:    [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8*
// TCHECK3-32-NEXT:    [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
// TCHECK3-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false)
// TCHECK3-32-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
// TCHECK3-32-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// TCHECK3-32-NEXT:    store i32 [[ADD]], i32* [[A_ADDR]], align 4
// TCHECK3-32-NEXT:    [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1
// TCHECK3-32-NEXT:    [[CONV2:%.*]] = sext i8 [[TMP4]] to i32
// TCHECK3-32-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1
// TCHECK3-32-NEXT:    [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8
// TCHECK3-32-NEXT:    store i8 [[CONV4]], i8* [[CONV]], align 1
// TCHECK3-32-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2
// TCHECK3-32-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// TCHECK3-32-NEXT:    [[ADD5:%.*]] = add nsw i32 [[TMP5]], 1
// TCHECK3-32-NEXT:    store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4
// TCHECK3-32-NEXT:    ret void
// TCHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167
// TCHECK3-32-SAME: (%struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR0]] {
// TCHECK3-32-NEXT:  entry:
// TCHECK3-32-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4
// TCHECK3-32-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
// TCHECK3-32-NEXT:    [[VLA_ADDR:%.*]] = alloca i32, align 4
// TCHECK3-32-NEXT:    [[VLA_ADDR2:%.*]] = alloca i32, align 4
// TCHECK3-32-NEXT:    [[C_ADDR:%.*]] = alloca i16*, align 4
// TCHECK3-32-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 4
// TCHECK3-32-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// TCHECK3-32-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i32, align 4
// TCHECK3-32-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4
// TCHECK3-32-NEXT:    store i32 [[B]], i32* [[B_ADDR]], align 4
// TCHECK3-32-NEXT:    store i32 [[VLA]], i32* [[VLA_ADDR]], align 4
// TCHECK3-32-NEXT:    store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4
// TCHECK3-32-NEXT:    store i16* [[C]], i16** [[C_ADDR]], align 4
// TCHECK3-32-NEXT:    [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4
// TCHECK3-32-NEXT:    [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4
// TCHECK3-32-NEXT:    [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4
// TCHECK3-32-NEXT:    [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4
// TCHECK3-32-NEXT:    [[TMP4:%.*]] = call i8* @llvm.stacksave()
// TCHECK3-32-NEXT:    store i8* [[TMP4]], i8** [[SAVED_STACK]], align 4
// TCHECK3-32-NEXT:    [[TMP5:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
// TCHECK3-32-NEXT:    [[VLA3:%.*]] = alloca i16, i32 [[TMP5]], align 2
// TCHECK3-32-NEXT:    store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4
// TCHECK3-32-NEXT:    store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4
// TCHECK3-32-NEXT:    [[TMP6:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
// TCHECK3-32-NEXT:    [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2
// TCHECK3-32-NEXT:    [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8*
// TCHECK3-32-NEXT:    [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8*
// TCHECK3-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i32 [[TMP7]], i1 false)
// TCHECK3-32-NEXT:    [[TMP10:%.*]] = load i32, i32* [[B_ADDR]], align 4
// TCHECK3-32-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP10]] to double
// TCHECK3-32-NEXT:    [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00
// TCHECK3-32-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0
// TCHECK3-32-NEXT:    store double [[ADD]], double* [[A]], align 4
// TCHECK3-32-NEXT:    [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0
// TCHECK3-32-NEXT:    [[TMP11:%.*]] = load double, double* [[A4]], align 4
// TCHECK3-32-NEXT:    [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00
// TCHECK3-32-NEXT:    store double [[INC]], double* [[A4]], align 4
// TCHECK3-32-NEXT:    [[CONV5:%.*]] = fptosi double [[INC]] to i16
// TCHECK3-32-NEXT:    [[TMP12:%.*]] = mul nsw i32 1, [[TMP2]]
// TCHECK3-32-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i32 [[TMP12]]
// TCHECK3-32-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1
// TCHECK3-32-NEXT:    store i16 [[CONV5]], i16* [[ARRAYIDX6]], align 2
// TCHECK3-32-NEXT:    [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
// TCHECK3-32-NEXT:    call void @llvm.stackrestore(i8* [[TMP13]])
// TCHECK3-32-NEXT:    ret void
// TCHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128
// TCHECK3-32-SAME: (i32 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// TCHECK3-32-NEXT:  entry:
// TCHECK3-32-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
// TCHECK3-32-NEXT:    [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4
// TCHECK3-32-NEXT:    [[B1:%.*]] = alloca [10 x i32], align 4
// TCHECK3-32-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
// TCHECK3-32-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4
// TCHECK3-32-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4
// TCHECK3-32-NEXT:    [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8*
// TCHECK3-32-NEXT:    [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
// TCHECK3-32-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false)
// TCHECK3-32-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
// TCHECK3-32-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// TCHECK3-32-NEXT:    store i32 [[ADD]], i32* [[A_ADDR]], align 4
// TCHECK3-32-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2
// TCHECK3-32-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// TCHECK3-32-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1
// TCHECK3-32-NEXT:    store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4
// TCHECK3-32-NEXT:    ret void
// CHECK0-LABEL: define {{[^@]+}}@_Z3fooiPd
// CHECK0-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK0-NEXT:  entry:
// CHECK0-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK0-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 8
// CHECK0-NEXT:    [[A:%.*]] = alloca i32, align 4
// CHECK0-NEXT:    [[AA:%.*]] = alloca i16, align 2
// CHECK0-NEXT:    [[B:%.*]] = alloca [10 x float], align 4
// CHECK0-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 8
// CHECK0-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// CHECK0-NEXT:    [[C:%.*]] = alloca [5 x [10 x double]], align 8
// CHECK0-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i64, align 8
// CHECK0-NEXT:    [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 8
// CHECK0-NEXT:    [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
// CHECK0-NEXT:    [[P:%.*]] = alloca i32*, align 64
// CHECK0-NEXT:    [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK0-NEXT:    [[GA_CASTED:%.*]] = alloca i64, align 8
// CHECK0-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 8
// CHECK0-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 8
// CHECK0-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 8
// CHECK0-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK0-NEXT:    [[AA_CASTED:%.*]] = alloca i64, align 8
// CHECK0-NEXT:    [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [9 x i8*], align 8
// CHECK0-NEXT:    [[DOTOFFLOAD_PTRS5:%.*]] = alloca [9 x i8*], align 8
// CHECK0-NEXT:    [[DOTOFFLOAD_MAPPERS6:%.*]] = alloca [9 x i8*], align 8
// CHECK0-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 8
// CHECK0-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
// CHECK0-NEXT:    [[DOTOFFLOAD_BASEPTRS10:%.*]] = alloca [2 x i8*], align 8
// CHECK0-NEXT:    [[DOTOFFLOAD_PTRS11:%.*]] = alloca [2 x i8*], align 8
// CHECK0-NEXT:    [[DOTOFFLOAD_MAPPERS12:%.*]] = alloca [2 x i8*], align 8
// CHECK0-NEXT:    [[KERNEL_ARGS13:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
// CHECK0-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK0-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 8
// CHECK0-NEXT:    store i32 0, i32* [[A]], align 4
// CHECK0-NEXT:    store i16 0, i16* [[AA]], align 2
// CHECK0-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK0-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
// CHECK0-NEXT:    [[TMP2:%.*]] = call i8* @llvm.stacksave()
// CHECK0-NEXT:    store i8* [[TMP2]], i8** [[SAVED_STACK]], align 8
// CHECK0-NEXT:    [[VLA:%.*]] = alloca float, i64 [[TMP1]], align 4
// CHECK0-NEXT:    store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8
// CHECK0-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK0-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
// CHECK0-NEXT:    [[TMP5:%.*]] = mul nuw i64 5, [[TMP4]]
// CHECK0-NEXT:    [[VLA1:%.*]] = alloca double, i64 [[TMP5]], align 8
// CHECK0-NEXT:    store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8
// CHECK0-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0
// CHECK0-NEXT:    [[TMP6:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK0-NEXT:    store i32 [[TMP6]], i32* [[X]], align 4
// CHECK0-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1
// CHECK0-NEXT:    [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK0-NEXT:    store i32 [[TMP7]], i32* [[Y]], align 4
// CHECK0-NEXT:    store i32* [[A]], i32** [[P]], align 64
// CHECK0-NEXT:    [[TMP8:%.*]] = load i32, i32* [[A]], align 4
// CHECK0-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32*
// CHECK0-NEXT:    store i32 [[TMP8]], i32* [[CONV]], align 4
// CHECK0-NEXT:    [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8
// CHECK0-NEXT:    [[TMP10:%.*]] = load i32*, i32** [[P]], align 64
// CHECK0-NEXT:    [[TMP11:%.*]] = load i32, i32* @ga, align 4
// CHECK0-NEXT:    [[CONV2:%.*]] = bitcast i64* [[GA_CASTED]] to i32*
// CHECK0-NEXT:    store i32 [[TMP11]], i32* [[CONV2]], align 4
// CHECK0-NEXT:    [[TMP12:%.*]] = load i64, i64* [[GA_CASTED]], align 8
// CHECK0-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK0-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i64*
// CHECK0-NEXT:    store i64 [[TMP9]], i64* [[TMP14]], align 8
// CHECK0-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK0-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64*
// CHECK0-NEXT:    store i64 [[TMP9]], i64* [[TMP16]], align 8
// CHECK0-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
// CHECK0-NEXT:    store i8* null, i8** [[TMP17]], align 8
// CHECK0-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK0-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
// CHECK0-NEXT:    store i32* [[TMP10]], i32** [[TMP19]], align 8
// CHECK0-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK0-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
// CHECK0-NEXT:    store i32* [[TMP10]], i32** [[TMP21]], align 8
// CHECK0-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
// CHECK0-NEXT:    store i8* null, i8** [[TMP22]], align 8
// CHECK0-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK0-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i64*
// CHECK0-NEXT:    store i64 [[TMP12]], i64* [[TMP24]], align 8
// CHECK0-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK0-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64*
// CHECK0-NEXT:    store i64 [[TMP12]], i64* [[TMP26]], align 8
// CHECK0-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
// CHECK0-NEXT:    store i8* null, i8** [[TMP27]], align 8
// CHECK0-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK0-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK0-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK0-NEXT:    store i32 2, i32* [[TMP30]], align 4
// CHECK0-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK0-NEXT:    store i32 3, i32* [[TMP31]], align 4
// CHECK0-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK0-NEXT:    store i8** [[TMP28]], i8*** [[TMP32]], align 8
// CHECK0-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK0-NEXT:    store i8** [[TMP29]], i8*** [[TMP33]], align 8
// CHECK0-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK0-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP34]], align 8
// CHECK0-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK0-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP35]], align 8
// CHECK0-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK0-NEXT:    store i8** null, i8*** [[TMP36]], align 8
// CHECK0-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK0-NEXT:    store i8** null, i8*** [[TMP37]], align 8
// CHECK0-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK0-NEXT:    store i64 0, i64* [[TMP38]], align 8
// CHECK0-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK0-NEXT:    store i64 0, i64* [[TMP39]], align 8
// CHECK0-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK0-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP40]], align 4
// CHECK0-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK0-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP41]], align 4
// CHECK0-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK0-NEXT:    store i32 0, i32* [[TMP42]], align 4
// CHECK0-NEXT:    [[TMP43:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK0-NEXT:    [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0
// CHECK0-NEXT:    br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK0:       omp_offload.failed:
// CHECK0-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63(i64 [[TMP9]], i32* [[TMP10]], i64 [[TMP12]]) #[[ATTR3:[0-9]+]]
// CHECK0-NEXT:    br label [[OMP_OFFLOAD_CONT]]
// CHECK0:       omp_offload.cont:
// CHECK0-NEXT:    [[TMP45:%.*]] = load i16, i16* [[AA]], align 2
// CHECK0-NEXT:    [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16*
// CHECK0-NEXT:    store i16 [[TMP45]], i16* [[CONV3]], align 2
// CHECK0-NEXT:    [[TMP46:%.*]] = load i64, i64* [[AA_CASTED]], align 8
// CHECK0-NEXT:    [[TMP47:%.*]] = mul nuw i64 [[TMP1]], 4
// CHECK0-NEXT:    [[TMP48:%.*]] = mul nuw i64 5, [[TMP4]]
// CHECK0-NEXT:    [[TMP49:%.*]] = mul nuw i64 [[TMP48]], 8
// CHECK0-NEXT:    [[TMP50:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
// CHECK0-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP50]], i8* align 8 bitcast ([9 x i64]* @.offload_sizes.1 to i8*), i64 72, i1 false)
// CHECK0-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
// CHECK0-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i64*
// CHECK0-NEXT:    store i64 [[TMP46]], i64* [[TMP52]], align 8
// CHECK0-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
// CHECK0-NEXT:    [[TMP54:%.*]] = bitcast i8** [[TMP53]] to i64*
// CHECK0-NEXT:    store i64 [[TMP46]], i64* [[TMP54]], align 8
// CHECK0-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 0
// CHECK0-NEXT:    store i8* null, i8** [[TMP55]], align 8
// CHECK0-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
// CHECK0-NEXT:    [[TMP57:%.*]] = bitcast i8** [[TMP56]] to [10 x float]**
// CHECK0-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP57]], align 8
// CHECK0-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
// CHECK0-NEXT:    [[TMP59:%.*]] = bitcast i8** [[TMP58]] to [10 x float]**
// CHECK0-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP59]], align 8
// CHECK0-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 1
// CHECK0-NEXT:    store i8* null, i8** [[TMP60]], align 8
// CHECK0-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
// CHECK0-NEXT:    [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i64*
// CHECK0-NEXT:    store i64 [[TMP1]], i64* [[TMP62]], align 8
// CHECK0-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
// CHECK0-NEXT:    [[TMP64:%.*]] = bitcast i8** [[TMP63]] to i64*
// CHECK0-NEXT:    store i64 [[TMP1]], i64* [[TMP64]], align 8
// CHECK0-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 2
// CHECK0-NEXT:    store i8* null, i8** [[TMP65]], align 8
// CHECK0-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 3
// CHECK0-NEXT:    [[TMP67:%.*]] = bitcast i8** [[TMP66]] to float**
// CHECK0-NEXT:    store float* [[VLA]], float** [[TMP67]], align 8
// CHECK0-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 3
// CHECK0-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to float**
// CHECK0-NEXT:    store float* [[VLA]], float** [[TMP69]], align 8
// CHECK0-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
// CHECK0-NEXT:    store i64 [[TMP47]], i64* [[TMP70]], align 8
// CHECK0-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 3
// CHECK0-NEXT:    store i8* null, i8** [[TMP71]], align 8
// CHECK0-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 4
// CHECK0-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to [5 x [10 x double]]**
// CHECK0-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP73]], align 8
// CHECK0-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 4
// CHECK0-NEXT:    [[TMP75:%.*]] = bitcast i8** [[TMP74]] to [5 x [10 x double]]**
// CHECK0-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP75]], align 8
// CHECK0-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 4
// CHECK0-NEXT:    store i8* null, i8** [[TMP76]], align 8
// CHECK0-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 5
// CHECK0-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i64*
// CHECK0-NEXT:    store i64 5, i64* [[TMP78]], align 8
// CHECK0-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 5
// CHECK0-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i64*
// CHECK0-NEXT:    store i64 5, i64* [[TMP80]], align 8
// CHECK0-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 5
// CHECK0-NEXT:    store i8* null, i8** [[TMP81]], align 8
// CHECK0-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 6
// CHECK0-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i64*
// CHECK0-NEXT:    store i64 [[TMP4]], i64* [[TMP83]], align 8
// CHECK0-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 6
// CHECK0-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i64*
// CHECK0-NEXT:    store i64 [[TMP4]], i64* [[TMP85]], align 8
// CHECK0-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 6
// CHECK0-NEXT:    store i8* null, i8** [[TMP86]], align 8
// CHECK0-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 7
// CHECK0-NEXT:    [[TMP88:%.*]] = bitcast i8** [[TMP87]] to double**
// CHECK0-NEXT:    store double* [[VLA1]], double** [[TMP88]], align 8
// CHECK0-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 7
// CHECK0-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to double**
// CHECK0-NEXT:    store double* [[VLA1]], double** [[TMP90]], align 8
// CHECK0-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
// CHECK0-NEXT:    store i64 [[TMP49]], i64* [[TMP91]], align 8
// CHECK0-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 7
// CHECK0-NEXT:    store i8* null, i8** [[TMP92]], align 8
// CHECK0-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 8
// CHECK0-NEXT:    [[TMP94:%.*]] = bitcast i8** [[TMP93]] to %struct.TT**
// CHECK0-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP94]], align 8
// CHECK0-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 8
// CHECK0-NEXT:    [[TMP96:%.*]] = bitcast i8** [[TMP95]] to %struct.TT**
// CHECK0-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP96]], align 8
// CHECK0-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 8
// CHECK0-NEXT:    store i8* null, i8** [[TMP97]], align 8
// CHECK0-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
// CHECK0-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
// CHECK0-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
// CHECK0-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 0
// CHECK0-NEXT:    store i32 2, i32* [[TMP101]], align 4
// CHECK0-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 1
// CHECK0-NEXT:    store i32 9, i32* [[TMP102]], align 4
// CHECK0-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 2
// CHECK0-NEXT:    store i8** [[TMP98]], i8*** [[TMP103]], align 8
// CHECK0-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 3
// CHECK0-NEXT:    store i8** [[TMP99]], i8*** [[TMP104]], align 8
// CHECK0-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 4
// CHECK0-NEXT:    store i64* [[TMP100]], i64** [[TMP105]], align 8
// CHECK0-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 5
// CHECK0-NEXT:    store i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.2, i32 0, i32 0), i64** [[TMP106]], align 8
// CHECK0-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 6
// CHECK0-NEXT:    store i8** null, i8*** [[TMP107]], align 8
// CHECK0-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 7
// CHECK0-NEXT:    store i8** null, i8*** [[TMP108]], align 8
// CHECK0-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 8
// CHECK0-NEXT:    store i64 0, i64* [[TMP109]], align 8
// CHECK0-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 9
// CHECK0-NEXT:    store i64 0, i64* [[TMP110]], align 8
// CHECK0-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 10
// CHECK0-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP111]], align 4
// CHECK0-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 11
// CHECK0-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP112]], align 4
// CHECK0-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 12
// CHECK0-NEXT:    store i32 0, i32* [[TMP113]], align 4
// CHECK0-NEXT:    [[TMP114:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]])
// CHECK0-NEXT:    [[TMP115:%.*]] = icmp ne i32 [[TMP114]], 0
// CHECK0-NEXT:    br i1 [[TMP115]], label [[OMP_OFFLOAD_FAILED8:%.*]], label [[OMP_OFFLOAD_CONT9:%.*]]
// CHECK0:       omp_offload.failed8:
// CHECK0-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70(i64 [[TMP46]], [10 x float]* [[B]], i64 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP4]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR3]]
// CHECK0-NEXT:    br label [[OMP_OFFLOAD_CONT9]]
// CHECK0:       omp_offload.cont9:
// CHECK0-NEXT:    [[TMP116:%.*]] = load double*, double** [[PTR_ADDR]], align 8
// CHECK0-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0
// CHECK0-NEXT:    [[TMP118:%.*]] = bitcast i8** [[TMP117]] to double**
// CHECK0-NEXT:    store double* [[TMP116]], double** [[TMP118]], align 8
// CHECK0-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 0
// CHECK0-NEXT:    [[TMP120:%.*]] = bitcast i8** [[TMP119]] to double**
// CHECK0-NEXT:    store double* [[TMP116]], double** [[TMP120]], align 8
// CHECK0-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 0
// CHECK0-NEXT:    store i8* null, i8** [[TMP121]], align 8
// CHECK0-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 1
// CHECK0-NEXT:    [[TMP123:%.*]] = bitcast i8** [[TMP122]] to %struct.TT.0**
// CHECK0-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[TMP123]], align 8
// CHECK0-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 1
// CHECK0-NEXT:    [[TMP125:%.*]] = bitcast i8** [[TMP124]] to %struct.TT.0**
// CHECK0-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[TMP125]], align 8
// CHECK0-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 1
// CHECK0-NEXT:    store i8* null, i8** [[TMP126]], align 8
// CHECK0-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0
// CHECK0-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 0
// CHECK0-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 0
// CHECK0-NEXT:    store i32 2, i32* [[TMP129]], align 4
// CHECK0-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 1
// CHECK0-NEXT:    store i32 2, i32* [[TMP130]], align 4
// CHECK0-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 2
// CHECK0-NEXT:    store i8** [[TMP127]], i8*** [[TMP131]], align 8
// CHECK0-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 3
// CHECK0-NEXT:    store i8** [[TMP128]], i8*** [[TMP132]], align 8
// CHECK0-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 4
// CHECK0-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.3, i32 0, i32 0), i64** [[TMP133]], align 8
// CHECK0-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 5
// CHECK0-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.4, i32 0, i32 0), i64** [[TMP134]], align 8
// CHECK0-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 6
// CHECK0-NEXT:    store i8** null, i8*** [[TMP135]], align 8
// CHECK0-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 7
// CHECK0-NEXT:    store i8** null, i8*** [[TMP136]], align 8
// CHECK0-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 8
// CHECK0-NEXT:    store i64 0, i64* [[TMP137]], align 8
// CHECK0-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 9
// CHECK0-NEXT:    store i64 0, i64* [[TMP138]], align 8
// CHECK0-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 10
// CHECK0-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP139]], align 4
// CHECK0-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 11
// CHECK0-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP140]], align 4
// CHECK0-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 12
// CHECK0-NEXT:    store i32 0, i32* [[TMP141]], align 4
// CHECK0-NEXT:    [[TMP142:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]])
// CHECK0-NEXT:    [[TMP143:%.*]] = icmp ne i32 [[TMP142]], 0
// CHECK0-NEXT:    br i1 [[TMP143]], label [[OMP_OFFLOAD_FAILED14:%.*]], label [[OMP_OFFLOAD_CONT15:%.*]]
// CHECK0:       omp_offload.failed14:
// CHECK0-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111(double* [[TMP116]], %struct.TT.0* [[E]]) #[[ATTR3]]
// CHECK0-NEXT:    br label [[OMP_OFFLOAD_CONT15]]
// CHECK0:       omp_offload.cont15:
// CHECK0-NEXT:    [[TMP144:%.*]] = load i32, i32* [[A]], align 4
// CHECK0-NEXT:    [[TMP145:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
// CHECK0-NEXT:    call void @llvm.stackrestore(i8* [[TMP145]])
// CHECK0-NEXT:    ret i32 [[TMP144]]
//
//
// CHECK0-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63
// CHECK0-SAME: (i64 noundef [[A:%.*]], i32* noundef [[P:%.*]], i64 noundef [[GA:%.*]]) #[[ATTR2:[0-9]+]] {
// CHECK0-NEXT:  entry:
// CHECK0-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK0-NEXT:    [[P_ADDR:%.*]] = alloca i32*, align 8
// CHECK0-NEXT:    [[GA_ADDR:%.*]] = alloca i64, align 8
// CHECK0-NEXT:    store i64 [[A]], i64* [[A_ADDR]], align 8
// CHECK0-NEXT:    store i32* [[P]], i32** [[P_ADDR]], align 8
// CHECK0-NEXT:    store i64 [[GA]], i64* [[GA_ADDR]], align 8
// CHECK0-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
// CHECK0-NEXT:    [[CONV1:%.*]] = bitcast i64* [[GA_ADDR]] to i32*
// CHECK0-NEXT:    ret void
//
//
// CHECK0-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70
// CHECK0-SAME: (i64 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR2]] {
// CHECK0-NEXT:  entry:
// CHECK0-NEXT:    [[AA_ADDR:%.*]] = alloca i64, align 8
// CHECK0-NEXT:    [[B_ADDR:%.*]] = alloca [10 x float]*, align 8
// CHECK0-NEXT:    [[VLA_ADDR:%.*]] = alloca i64, align 8
// CHECK0-NEXT:    [[BN_ADDR:%.*]] = alloca float*, align 8
// CHECK0-NEXT:    [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 8
// CHECK0-NEXT:    [[VLA_ADDR2:%.*]] = alloca i64, align 8
// CHECK0-NEXT:    [[VLA_ADDR4:%.*]] = alloca i64, align 8
// CHECK0-NEXT:    [[CN_ADDR:%.*]] = alloca double*, align 8
// CHECK0-NEXT:    [[D_ADDR:%.*]] = alloca %struct.TT*, align 8
// CHECK0-NEXT:    [[B5:%.*]] = alloca [10 x float], align 4
// CHECK0-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 8
// CHECK0-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// CHECK0-NEXT:    [[C7:%.*]] = alloca [5 x [10 x double]], align 8
// CHECK0-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i64, align 8
// CHECK0-NEXT:    [[__VLA_EXPR2:%.*]] = alloca i64, align 8
// CHECK0-NEXT:    [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 8
// CHECK0-NEXT:    store i64 [[AA]], i64* [[AA_ADDR]], align 8
// CHECK0-NEXT:    store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8
// CHECK0-NEXT:    store i64 [[VLA]], i64* [[VLA_ADDR]], align 8
// CHECK0-NEXT:    store float* [[BN]], float** [[BN_ADDR]], align 8
// CHECK0-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 8
// CHECK0-NEXT:    store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8
// CHECK0-NEXT:    store i64 [[VLA3]], i64* [[VLA_ADDR4]], align 8
// CHECK0-NEXT:    store double* [[CN]], double** [[CN_ADDR]], align 8
// CHECK0-NEXT:    store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 8
// CHECK0-NEXT:    [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16*
// CHECK0-NEXT:    [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 8
// CHECK0-NEXT:    [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8
// CHECK0-NEXT:    [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 8
// CHECK0-NEXT:    [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 8
// CHECK0-NEXT:    [[TMP4:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8
// CHECK0-NEXT:    [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8
// CHECK0-NEXT:    [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8
// CHECK0-NEXT:    [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8
// CHECK0-NEXT:    [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8*
// CHECK0-NEXT:    [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8*
// CHECK0-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i64 40, i1 false)
// CHECK0-NEXT:    [[TMP10:%.*]] = call i8* @llvm.stacksave()
// CHECK0-NEXT:    store i8* [[TMP10]], i8** [[SAVED_STACK]], align 8
// CHECK0-NEXT:    [[VLA6:%.*]] = alloca float, i64 [[TMP1]], align 4
// CHECK0-NEXT:    store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8
// CHECK0-NEXT:    [[TMP11:%.*]] = mul nuw i64 [[TMP1]], 4
// CHECK0-NEXT:    [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8*
// CHECK0-NEXT:    [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8*
// CHECK0-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 [[TMP11]], i1 false)
// CHECK0-NEXT:    [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8*
// CHECK0-NEXT:    [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8*
// CHECK0-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 400, i1 false)
// CHECK0-NEXT:    [[TMP16:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]]
// CHECK0-NEXT:    [[VLA8:%.*]] = alloca double, i64 [[TMP16]], align 8
// CHECK0-NEXT:    store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8
// CHECK0-NEXT:    store i64 [[TMP5]], i64* [[__VLA_EXPR2]], align 8
// CHECK0-NEXT:    [[TMP17:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]]
// CHECK0-NEXT:    [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 8
// CHECK0-NEXT:    [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8*
// CHECK0-NEXT:    [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8*
// CHECK0-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i64 [[TMP18]], i1 false)
// CHECK0-NEXT:    [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8*
// CHECK0-NEXT:    [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8*
// CHECK0-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP21]], i8* align 8 [[TMP22]], i64 16, i1 false)
// CHECK0-NEXT:    [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2
// CHECK0-NEXT:    [[CONV10:%.*]] = sext i16 [[TMP23]] to i32
// CHECK0-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV10]], 1
// CHECK0-NEXT:    [[CONV11:%.*]] = trunc i32 [[ADD]] to i16
// CHECK0-NEXT:    store i16 [[CONV11]], i16* [[CONV]], align 2
// CHECK0-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i64 0, i64 2
// CHECK0-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX]], align 4
// CHECK0-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i64 3
// CHECK0-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX12]], align 4
// CHECK0-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i64 0, i64 1
// CHECK0-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i64 0, i64 2
// CHECK0-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX14]], align 8
// CHECK0-NEXT:    [[TMP24:%.*]] = mul nsw i64 1, [[TMP5]]
// CHECK0-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i64 [[TMP24]]
// CHECK0-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i64 3
// CHECK0-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX16]], align 8
// CHECK0-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0
// CHECK0-NEXT:    store i64 1, i64* [[X]], align 8
// CHECK0-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1
// CHECK0-NEXT:    store i8 1, i8* [[Y]], align 8
// CHECK0-NEXT:    [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
// CHECK0-NEXT:    call void @llvm.stackrestore(i8* [[TMP25]])
// CHECK0-NEXT:    ret void
//
//
// CHECK0-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111
// CHECK0-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR2]] {
// CHECK0-NEXT:  entry:
// CHECK0-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 8
// CHECK0-NEXT:    [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 8
// CHECK0-NEXT:    [[E1:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
// CHECK0-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 8
// CHECK0-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 8
// CHECK0-NEXT:    [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 8
// CHECK0-NEXT:    [[TMP1:%.*]] = bitcast %struct.TT.0* [[E1]] to i8*
// CHECK0-NEXT:    [[TMP2:%.*]] = bitcast %struct.TT.0* [[TMP0]] to i8*
// CHECK0-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 8, i1 false)
// CHECK0-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E1]], i32 0, i32 0
// CHECK0-NEXT:    [[TMP3:%.*]] = load i32, i32* [[X]], align 4
// CHECK0-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP3]] to double
// CHECK0-NEXT:    [[TMP4:%.*]] = load double*, double** [[PTR_ADDR]], align 8
// CHECK0-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP4]], i64 0
// CHECK0-NEXT:    store double [[CONV]], double* [[ARRAYIDX]], align 8
// CHECK0-NEXT:    [[TMP5:%.*]] = load double*, double** [[PTR_ADDR]], align 8
// CHECK0-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[TMP5]], i64 0
// CHECK0-NEXT:    [[TMP6:%.*]] = load double, double* [[ARRAYIDX2]], align 8
// CHECK0-NEXT:    [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00
// CHECK0-NEXT:    store double [[INC]], double* [[ARRAYIDX2]], align 8
// CHECK0-NEXT:    ret void
//
//
// CHECK0-LABEL: define {{[^@]+}}@_Z3bariPd
// CHECK0-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] {
// CHECK0-NEXT:  entry:
// CHECK0-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK0-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 8
// CHECK0-NEXT:    [[A:%.*]] = alloca i32, align 4
// CHECK0-NEXT:    [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 8
// CHECK0-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK0-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 8
// CHECK0-NEXT:    store i32 0, i32* [[A]], align 4
// CHECK0-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK0-NEXT:    [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 8
// CHECK0-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z3fooiPd(i32 noundef signext [[TMP0]], double* noundef [[TMP1]])
// CHECK0-NEXT:    [[TMP2:%.*]] = load i32, i32* [[A]], align 4
// CHECK0-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]]
// CHECK0-NEXT:    store i32 [[ADD]], i32* [[A]], align 4
// CHECK0-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK0-NEXT:    [[CALL1:%.*]] = call noundef signext i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 8 dereferenceable(8) [[S]], i32 noundef signext [[TMP3]])
// CHECK0-NEXT:    [[TMP4:%.*]] = load i32, i32* [[A]], align 4
// CHECK0-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]]
// CHECK0-NEXT:    store i32 [[ADD2]], i32* [[A]], align 4
// CHECK0-NEXT:    [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK0-NEXT:    [[CALL3:%.*]] = call noundef signext i32 @_ZL7fstatici(i32 noundef signext [[TMP5]])
// CHECK0-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A]], align 4
// CHECK0-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]]
// CHECK0-NEXT:    store i32 [[ADD4]], i32* [[A]], align 4
// CHECK0-NEXT:    [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK0-NEXT:    [[CALL5:%.*]] = call noundef signext i32 @_Z9ftemplateIiET_i(i32 noundef signext [[TMP7]])
// CHECK0-NEXT:    [[TMP8:%.*]] = load i32, i32* [[A]], align 4
// CHECK0-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]]
// CHECK0-NEXT:    store i32 [[ADD6]], i32* [[A]], align 4
// CHECK0-NEXT:    [[TMP9:%.*]] = load i32, i32* [[A]], align 4
// CHECK0-NEXT:    ret i32 [[TMP9]]
//
//
// CHECK0-LABEL: define {{[^@]+}}@_ZN2S12r1Ei
// CHECK0-SAME: (%struct.S1* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat align 2 {
// CHECK0-NEXT:  entry:
// CHECK0-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
// CHECK0-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK0-NEXT:    [[B:%.*]] = alloca i32, align 4
// CHECK0-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 8
// CHECK0-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// CHECK0-NEXT:    [[B_CASTED:%.*]] = alloca i64, align 8
// CHECK0-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [5 x i8*], align 8
// CHECK0-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x i8*], align 8
// CHECK0-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x i8*], align 8
// CHECK0-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 8
// CHECK0-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK0-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
// CHECK0-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK0-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
// CHECK0-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK0-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// CHECK0-NEXT:    store i32 [[ADD]], i32* [[B]], align 4
// CHECK0-NEXT:    [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK0-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
// CHECK0-NEXT:    [[TMP3:%.*]] = call i8* @llvm.stacksave()
// CHECK0-NEXT:    store i8* [[TMP3]], i8** [[SAVED_STACK]], align 8
// CHECK0-NEXT:    [[TMP4:%.*]] = mul nuw i64 2, [[TMP2]]
// CHECK0-NEXT:    [[VLA:%.*]] = alloca i16, i64 [[TMP4]], align 2
// CHECK0-NEXT:    store i64 [[TMP2]], i64* [[__VLA_EXPR0]], align 8
// CHECK0-NEXT:    [[TMP5:%.*]] = load i32, i32* [[B]], align 4
// CHECK0-NEXT:    [[CONV:%.*]] = bitcast i64* [[B_CASTED]] to i32*
// CHECK0-NEXT:    store i32 [[TMP5]], i32* [[CONV]], align 4
// CHECK0-NEXT:    [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8
// CHECK0-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
// CHECK0-NEXT:    [[TMP7:%.*]] = mul nuw i64 2, [[TMP2]]
// CHECK0-NEXT:    [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2
// CHECK0-NEXT:    [[TMP9:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
// CHECK0-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP9]], i8* align 8 bitcast ([5 x i64]* @.offload_sizes.5 to i8*), i64 40, i1 false)
// CHECK0-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK0-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
// CHECK0-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 8
// CHECK0-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK0-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
// CHECK0-NEXT:    store double* [[A]], double** [[TMP13]], align 8
// CHECK0-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
// CHECK0-NEXT:    store i8* null, i8** [[TMP14]], align 8
// CHECK0-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK0-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64*
// CHECK0-NEXT:    store i64 [[TMP6]], i64* [[TMP16]], align 8
// CHECK0-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK0-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i64*
// CHECK0-NEXT:    store i64 [[TMP6]], i64* [[TMP18]], align 8
// CHECK0-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
// CHECK0-NEXT:    store i8* null, i8** [[TMP19]], align 8
// CHECK0-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK0-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i64*
// CHECK0-NEXT:    store i64 2, i64* [[TMP21]], align 8
// CHECK0-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK0-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i64*
// CHECK0-NEXT:    store i64 2, i64* [[TMP23]], align 8
// CHECK0-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
// CHECK0-NEXT:    store i8* null, i8** [[TMP24]], align 8
// CHECK0-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
// CHECK0-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64*
// CHECK0-NEXT:    store i64 [[TMP2]], i64* [[TMP26]], align 8
// CHECK0-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
// CHECK0-NEXT:    [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i64*
// CHECK0-NEXT:    store i64 [[TMP2]], i64* [[TMP28]], align 8
// CHECK0-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
// CHECK0-NEXT:    store i8* null, i8** [[TMP29]], align 8
// CHECK0-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
// CHECK0-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i16**
// CHECK0-NEXT:    store i16* [[VLA]], i16** [[TMP31]], align 8
// CHECK0-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
// CHECK0-NEXT:    [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i16**
// CHECK0-NEXT:    store i16* [[VLA]], i16** [[TMP33]], align 8
// CHECK0-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
// CHECK0-NEXT:    store i64 [[TMP8]], i64* [[TMP34]], align 8
// CHECK0-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
// CHECK0-NEXT:    store i8* null, i8** [[TMP35]], align 8
// CHECK0-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK0-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK0-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
// CHECK0-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK0-NEXT:    store i32 2, i32* [[TMP39]], align 4
// CHECK0-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK0-NEXT:    store i32 5, i32* [[TMP40]], align 4
// CHECK0-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK0-NEXT:    store i8** [[TMP36]], i8*** [[TMP41]], align 8
// CHECK0-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK0-NEXT:    store i8** [[TMP37]], i8*** [[TMP42]], align 8
// CHECK0-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK0-NEXT:    store i64* [[TMP38]], i64** [[TMP43]], align 8
// CHECK0-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK0-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.6, i32 0, i32 0), i64** [[TMP44]], align 8
// CHECK0-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK0-NEXT:    store i8** null, i8*** [[TMP45]], align 8
// CHECK0-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK0-NEXT:    store i8** null, i8*** [[TMP46]], align 8
// CHECK0-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK0-NEXT:    store i64 0, i64* [[TMP47]], align 8
// CHECK0-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK0-NEXT:    store i64 0, i64* [[TMP48]], align 8
// CHECK0-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK0-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP49]], align 4
// CHECK0-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK0-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP50]], align 4
// CHECK0-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK0-NEXT:    store i32 0, i32* [[TMP51]], align 4
// CHECK0-NEXT:    [[TMP52:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK0-NEXT:    [[TMP53:%.*]] = icmp ne i32 [[TMP52]], 0
// CHECK0-NEXT:    br i1 [[TMP53]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK0:       omp_offload.failed:
// CHECK0-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR3]]
// CHECK0-NEXT:    br label [[OMP_OFFLOAD_CONT]]
// CHECK0:       omp_offload.cont:
// CHECK0-NEXT:    [[TMP54:%.*]] = mul nsw i64 1, [[TMP2]]
// CHECK0-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP54]]
// CHECK0-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1
// CHECK0-NEXT:    [[TMP55:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
// CHECK0-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP55]] to i32
// CHECK0-NEXT:    [[TMP56:%.*]] = load i32, i32* [[B]], align 4
// CHECK0-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP56]]
// CHECK0-NEXT:    [[TMP57:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
// CHECK0-NEXT:    call void @llvm.stackrestore(i8* [[TMP57]])
// CHECK0-NEXT:    ret i32 [[ADD4]]
//
//
// CHECK0-LABEL: define {{[^@]+}}@_ZL7fstatici
// CHECK0-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] {
// CHECK0-NEXT:  entry:
// CHECK0-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK0-NEXT:    [[A:%.*]] = alloca i32, align 4
// CHECK0-NEXT:    [[AAA:%.*]] = alloca i8, align 1
// CHECK0-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
// CHECK0-NEXT:    [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK0-NEXT:    [[AAA_CASTED:%.*]] = alloca i64, align 8
// CHECK0-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 8
// CHECK0-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 8
// CHECK0-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 8
// CHECK0-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK0-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK0-NEXT:    store i32 0, i32* [[A]], align 4
// CHECK0-NEXT:    store i8 0, i8* [[AAA]], align 1
// CHECK0-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
// CHECK0-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32*
// CHECK0-NEXT:    store i32 [[TMP0]], i32* [[CONV]], align 4
// CHECK0-NEXT:    [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8
// CHECK0-NEXT:    [[TMP2:%.*]] = load i8, i8* [[AAA]], align 1
// CHECK0-NEXT:    [[CONV1:%.*]] = bitcast i64* [[AAA_CASTED]] to i8*
// CHECK0-NEXT:    store i8 [[TMP2]], i8* [[CONV1]], align 1
// CHECK0-NEXT:    [[TMP3:%.*]] = load i64, i64* [[AAA_CASTED]], align 8
// CHECK0-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK0-NEXT:    [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i64*
// CHECK0-NEXT:    store i64 [[TMP1]], i64* [[TMP5]], align 8
// CHECK0-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK0-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64*
// CHECK0-NEXT:    store i64 [[TMP1]], i64* [[TMP7]], align 8
// CHECK0-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
// CHECK0-NEXT:    store i8* null, i8** [[TMP8]], align 8
// CHECK0-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK0-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
// CHECK0-NEXT:    store i64 [[TMP3]], i64* [[TMP10]], align 8
// CHECK0-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK0-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i64*
// CHECK0-NEXT:    store i64 [[TMP3]], i64* [[TMP12]], align 8
// CHECK0-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
// CHECK0-NEXT:    store i8* null, i8** [[TMP13]], align 8
// CHECK0-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK0-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to [10 x i32]**
// CHECK0-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP15]], align 8
// CHECK0-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK0-NEXT:    [[TMP17:%.*]] = bitcast i8** [[TMP16]] to [10 x i32]**
// CHECK0-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP17]], align 8
// CHECK0-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
// CHECK0-NEXT:    store i8* null, i8** [[TMP18]], align 8
// CHECK0-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK0-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK0-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK0-NEXT:    store i32 2, i32* [[TMP21]], align 4
// CHECK0-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK0-NEXT:    store i32 3, i32* [[TMP22]], align 4
// CHECK0-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK0-NEXT:    store i8** [[TMP19]], i8*** [[TMP23]], align 8
// CHECK0-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK0-NEXT:    store i8** [[TMP20]], i8*** [[TMP24]], align 8
// CHECK0-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK0-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.7, i32 0, i32 0), i64** [[TMP25]], align 8
// CHECK0-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK0-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.8, i32 0, i32 0), i64** [[TMP26]], align 8
// CHECK0-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK0-NEXT:    store i8** null, i8*** [[TMP27]], align 8
// CHECK0-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK0-NEXT:    store i8** null, i8*** [[TMP28]], align 8
// CHECK0-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK0-NEXT:    store i64 0, i64* [[TMP29]], align 8
// CHECK0-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK0-NEXT:    store i64 0, i64* [[TMP30]], align 8
// CHECK0-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK0-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP31]], align 4
// CHECK0-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK0-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP32]], align 4
// CHECK0-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK0-NEXT:    store i32 0, i32* [[TMP33]], align 4
// CHECK0-NEXT:    [[TMP34:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK0-NEXT:    [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0
// CHECK0-NEXT:    br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK0:       omp_offload.failed:
// CHECK0-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142(i64 [[TMP1]], i64 [[TMP3]], [10 x i32]* [[B]]) #[[ATTR3]]
// CHECK0-NEXT:    br label [[OMP_OFFLOAD_CONT]]
// CHECK0:       omp_offload.cont:
// CHECK0-NEXT:    [[TMP36:%.*]] = load i32, i32* [[A]], align 4
// CHECK0-NEXT:    ret i32 [[TMP36]]
//
//
// CHECK0-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i
// CHECK0-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat {
// CHECK0-NEXT:  entry:
// CHECK0-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK0-NEXT:    [[A:%.*]] = alloca i32, align 4
// CHECK0-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
// CHECK0-NEXT:    [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK0-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x i8*], align 8
// CHECK0-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x i8*], align 8
// CHECK0-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x i8*], align 8
// CHECK0-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK0-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK0-NEXT:    store i32 0, i32* [[A]], align 4
// CHECK0-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
// CHECK0-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32*
// CHECK0-NEXT:    store i32 [[TMP0]], i32* [[CONV]], align 4
// CHECK0-NEXT:    [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8
// CHECK0-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK0-NEXT:    [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i64*
// CHECK0-NEXT:    store i64 [[TMP1]], i64* [[TMP3]], align 8
// CHECK0-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK0-NEXT:    [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i64*
// CHECK0-NEXT:    store i64 [[TMP1]], i64* [[TMP5]], align 8
// CHECK0-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
// CHECK0-NEXT:    store i8* null, i8** [[TMP6]], align 8
// CHECK0-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK0-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to [10 x i32]**
// CHECK0-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP8]], align 8
// CHECK0-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK0-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to [10 x i32]**
// CHECK0-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP10]], align 8
// CHECK0-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
// CHECK0-NEXT:    store i8* null, i8** [[TMP11]], align 8
// CHECK0-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK0-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK0-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK0-NEXT:    store i32 2, i32* [[TMP14]], align 4
// CHECK0-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK0-NEXT:    store i32 2, i32* [[TMP15]], align 4
// CHECK0-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK0-NEXT:    store i8** [[TMP12]], i8*** [[TMP16]], align 8
// CHECK0-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK0-NEXT:    store i8** [[TMP13]], i8*** [[TMP17]], align 8
// CHECK0-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK0-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.9, i32 0, i32 0), i64** [[TMP18]], align 8
// CHECK0-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK0-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.10, i32 0, i32 0), i64** [[TMP19]], align 8
// CHECK0-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK0-NEXT:    store i8** null, i8*** [[TMP20]], align 8
// CHECK0-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK0-NEXT:    store i8** null, i8*** [[TMP21]], align 8
// CHECK0-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK0-NEXT:    store i64 0, i64* [[TMP22]], align 8
// CHECK0-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK0-NEXT:    store i64 0, i64* [[TMP23]], align 8
// CHECK0-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK0-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP24]], align 4
// CHECK0-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK0-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP25]], align 4
// CHECK0-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK0-NEXT:    store i32 0, i32* [[TMP26]], align 4
// CHECK0-NEXT:    [[TMP27:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK0-NEXT:    [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0
// CHECK0-NEXT:    br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK0:       omp_offload.failed:
// CHECK0-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128(i64 [[TMP1]], [10 x i32]* [[B]]) #[[ATTR3]]
// CHECK0-NEXT:    br label [[OMP_OFFLOAD_CONT]]
// CHECK0:       omp_offload.cont:
// CHECK0-NEXT:    [[TMP29:%.*]] = load i32, i32* [[A]], align 4
// CHECK0-NEXT:    ret i32 [[TMP29]]
//
//
// CHECK0-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167
// CHECK0-SAME: (%struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] {
// CHECK0-NEXT:  entry:
// CHECK0-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
// CHECK0-NEXT:    [[B_ADDR:%.*]] = alloca i64, align 8
// CHECK0-NEXT:    [[VLA_ADDR:%.*]] = alloca i64, align 8
// CHECK0-NEXT:    [[VLA_ADDR2:%.*]] = alloca i64, align 8
// CHECK0-NEXT:    [[C_ADDR:%.*]] = alloca i16*, align 8
// CHECK0-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 8
// CHECK0-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// CHECK0-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i64, align 8
// CHECK0-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
// CHECK0-NEXT:    store i64 [[B]], i64* [[B_ADDR]], align 8
// CHECK0-NEXT:    store i64 [[VLA]], i64* [[VLA_ADDR]], align 8
// CHECK0-NEXT:    store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8
// CHECK0-NEXT:    store i16* [[C]], i16** [[C_ADDR]], align 8
// CHECK0-NEXT:    [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
// CHECK0-NEXT:    [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32*
// CHECK0-NEXT:    [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8
// CHECK0-NEXT:    [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8
// CHECK0-NEXT:    [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8
// CHECK0-NEXT:    [[TMP4:%.*]] = call i8* @llvm.stacksave()
// CHECK0-NEXT:    store i8* [[TMP4]], i8** [[SAVED_STACK]], align 8
// CHECK0-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]]
// CHECK0-NEXT:    [[VLA3:%.*]] = alloca i16, i64 [[TMP5]], align 2
// CHECK0-NEXT:    store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8
// CHECK0-NEXT:    store i64 [[TMP2]], i64* [[__VLA_EXPR1]], align 8
// CHECK0-NEXT:    [[TMP6:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]]
// CHECK0-NEXT:    [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 2
// CHECK0-NEXT:    [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8*
// CHECK0-NEXT:    [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8*
// CHECK0-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i64 [[TMP7]], i1 false)
// CHECK0-NEXT:    [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4
// CHECK0-NEXT:    [[CONV4:%.*]] = sitofp i32 [[TMP10]] to double
// CHECK0-NEXT:    [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00
// CHECK0-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0
// CHECK0-NEXT:    store double [[ADD]], double* [[A]], align 8
// CHECK0-NEXT:    [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0
// CHECK0-NEXT:    [[TMP11:%.*]] = load double, double* [[A5]], align 8
// CHECK0-NEXT:    [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00
// CHECK0-NEXT:    store double [[INC]], double* [[A5]], align 8
// CHECK0-NEXT:    [[CONV6:%.*]] = fptosi double [[INC]] to i16
// CHECK0-NEXT:    [[TMP12:%.*]] = mul nsw i64 1, [[TMP2]]
// CHECK0-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i64 [[TMP12]]
// CHECK0-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1
// CHECK0-NEXT:    store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2
// CHECK0-NEXT:    [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
// CHECK0-NEXT:    call void @llvm.stackrestore(i8* [[TMP13]])
// CHECK0-NEXT:    ret void
//
//
// CHECK0-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142
// CHECK0-SAME: (i64 noundef [[A:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
// CHECK0-NEXT:  entry:
// CHECK0-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK0-NEXT:    [[AAA_ADDR:%.*]] = alloca i64, align 8
// CHECK0-NEXT:    [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8
// CHECK0-NEXT:    [[B2:%.*]] = alloca [10 x i32], align 4
// CHECK0-NEXT:    store i64 [[A]], i64* [[A_ADDR]], align 8
// CHECK0-NEXT:    store i64 [[AAA]], i64* [[AAA_ADDR]], align 8
// CHECK0-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8
// CHECK0-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
// CHECK0-NEXT:    [[CONV1:%.*]] = bitcast i64* [[AAA_ADDR]] to i8*
// CHECK0-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8
// CHECK0-NEXT:    [[TMP1:%.*]] = bitcast [10 x i32]* [[B2]] to i8*
// CHECK0-NEXT:    [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
// CHECK0-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false)
// CHECK0-NEXT:    [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4
// CHECK0-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// CHECK0-NEXT:    store i32 [[ADD]], i32* [[CONV]], align 4
// CHECK0-NEXT:    [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 1
// CHECK0-NEXT:    [[CONV3:%.*]] = sext i8 [[TMP4]] to i32
// CHECK0-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1
// CHECK0-NEXT:    [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8
// CHECK0-NEXT:    store i8 [[CONV5]], i8* [[CONV1]], align 1
// CHECK0-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B2]], i64 0, i64 2
// CHECK0-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// CHECK0-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP5]], 1
// CHECK0-NEXT:    store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4
// CHECK0-NEXT:    ret void
//
//
// CHECK0-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128
// CHECK0-SAME: (i64 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
// CHECK0-NEXT:  entry:
// CHECK0-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK0-NEXT:    [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8
// CHECK0-NEXT:    [[B1:%.*]] = alloca [10 x i32], align 4
// CHECK0-NEXT:    store i64 [[A]], i64* [[A_ADDR]], align 8
// CHECK0-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8
// CHECK0-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
// CHECK0-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8
// CHECK0-NEXT:    [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8*
// CHECK0-NEXT:    [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
// CHECK0-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false)
// CHECK0-NEXT:    [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4
// CHECK0-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// CHECK0-NEXT:    store i32 [[ADD]], i32* [[CONV]], align 4
// CHECK0-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i64 0, i64 2
// CHECK0-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// CHECK0-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1
// CHECK0-NEXT:    store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4
// CHECK0-NEXT:    ret void
//
//
// CHECK0-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
// CHECK0-SAME: () #[[ATTR5:[0-9]+]] {
// CHECK0-NEXT:  entry:
// CHECK0-NEXT:    call void @__tgt_register_requires(i64 1)
// CHECK0-NEXT:    ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@_Z3fooiPd
// CHECK1-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK1-NEXT:  entry:
// CHECK1-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 8
// CHECK1-NEXT:    [[A:%.*]] = alloca i32, align 4
// CHECK1-NEXT:    [[AA:%.*]] = alloca i16, align 2
// CHECK1-NEXT:    [[B:%.*]] = alloca [10 x float], align 4
// CHECK1-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 8
// CHECK1-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// CHECK1-NEXT:    [[C:%.*]] = alloca [5 x [10 x double]], align 8
// CHECK1-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i64, align 8
// CHECK1-NEXT:    [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 8
// CHECK1-NEXT:    [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
// CHECK1-NEXT:    [[P:%.*]] = alloca i32*, align 64
// CHECK1-NEXT:    [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT:    [[GA_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 8
// CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 8
// CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 8
// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK1-NEXT:    [[AA_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [9 x i8*], align 8
// CHECK1-NEXT:    [[DOTOFFLOAD_PTRS5:%.*]] = alloca [9 x i8*], align 8
// CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS6:%.*]] = alloca [9 x i8*], align 8
// CHECK1-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 8
// CHECK1-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
// CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS10:%.*]] = alloca [2 x i8*], align 8
// CHECK1-NEXT:    [[DOTOFFLOAD_PTRS11:%.*]] = alloca [2 x i8*], align 8
// CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS12:%.*]] = alloca [2 x i8*], align 8
// CHECK1-NEXT:    [[KERNEL_ARGS13:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
// CHECK1-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK1-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 8
// CHECK1-NEXT:    store i32 0, i32* [[A]], align 4
// CHECK1-NEXT:    store i16 0, i16* [[AA]], align 2
// CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK1-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
// CHECK1-NEXT:    [[TMP2:%.*]] = call i8* @llvm.stacksave()
// CHECK1-NEXT:    store i8* [[TMP2]], i8** [[SAVED_STACK]], align 8
// CHECK1-NEXT:    [[VLA:%.*]] = alloca float, i64 [[TMP1]], align 4
// CHECK1-NEXT:    store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8
// CHECK1-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK1-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
// CHECK1-NEXT:    [[TMP5:%.*]] = mul nuw i64 5, [[TMP4]]
// CHECK1-NEXT:    [[VLA1:%.*]] = alloca double, i64 [[TMP5]], align 8
// CHECK1-NEXT:    store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8
// CHECK1-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0
// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK1-NEXT:    store i32 [[TMP6]], i32* [[X]], align 4
// CHECK1-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1
// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK1-NEXT:    store i32 [[TMP7]], i32* [[Y]], align 4
// CHECK1-NEXT:    store i32* [[A]], i32** [[P]], align 64
// CHECK1-NEXT:    [[TMP8:%.*]] = load i32, i32* [[A]], align 4
// CHECK1-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32*
// CHECK1-NEXT:    store i32 [[TMP8]], i32* [[CONV]], align 4
// CHECK1-NEXT:    [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8
// CHECK1-NEXT:    [[TMP10:%.*]] = load i32*, i32** [[P]], align 64
// CHECK1-NEXT:    [[TMP11:%.*]] = load i32, i32* @ga, align 4
// CHECK1-NEXT:    [[CONV2:%.*]] = bitcast i64* [[GA_CASTED]] to i32*
// CHECK1-NEXT:    store i32 [[TMP11]], i32* [[CONV2]], align 4
// CHECK1-NEXT:    [[TMP12:%.*]] = load i64, i64* [[GA_CASTED]], align 8
// CHECK1-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i64*
// CHECK1-NEXT:    store i64 [[TMP9]], i64* [[TMP14]], align 8
// CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64*
// CHECK1-NEXT:    store i64 [[TMP9]], i64* [[TMP16]], align 8
// CHECK1-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
// CHECK1-NEXT:    store i8* null, i8** [[TMP17]], align 8
// CHECK1-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK1-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
// CHECK1-NEXT:    store i32* [[TMP10]], i32** [[TMP19]], align 8
// CHECK1-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK1-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
// CHECK1-NEXT:    store i32* [[TMP10]], i32** [[TMP21]], align 8
// CHECK1-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
// CHECK1-NEXT:    store i8* null, i8** [[TMP22]], align 8
// CHECK1-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK1-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i64*
// CHECK1-NEXT:    store i64 [[TMP12]], i64* [[TMP24]], align 8
// CHECK1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK1-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64*
// CHECK1-NEXT:    store i64 [[TMP12]], i64* [[TMP26]], align 8
// CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
// CHECK1-NEXT:    store i8* null, i8** [[TMP27]], align 8
// CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK1-NEXT:    store i32 2, i32* [[TMP30]], align 4
// CHECK1-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK1-NEXT:    store i32 3, i32* [[TMP31]], align 4
// CHECK1-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK1-NEXT:    store i8** [[TMP28]], i8*** [[TMP32]], align 8
// CHECK1-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK1-NEXT:    store i8** [[TMP29]], i8*** [[TMP33]], align 8
// CHECK1-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK1-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP34]], align 8
// CHECK1-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK1-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP35]], align 8
// CHECK1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK1-NEXT:    store i8** null, i8*** [[TMP36]], align 8
// CHECK1-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK1-NEXT:    store i8** null, i8*** [[TMP37]], align 8
// CHECK1-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK1-NEXT:    store i64 0, i64* [[TMP38]], align 8
// CHECK1-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK1-NEXT:    store i64 0, i64* [[TMP39]], align 8
// CHECK1-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK1-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP40]], align 4
// CHECK1-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK1-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP41]], align 4
// CHECK1-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK1-NEXT:    store i32 0, i32* [[TMP42]], align 4
// CHECK1-NEXT:    [[TMP43:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK1-NEXT:    [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0
// CHECK1-NEXT:    br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK1:       omp_offload.failed:
// CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63(i64 [[TMP9]], i32* [[TMP10]], i64 [[TMP12]]) #[[ATTR3:[0-9]+]]
// CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
// CHECK1:       omp_offload.cont:
// CHECK1-NEXT:    [[TMP45:%.*]] = load i16, i16* [[AA]], align 2
// CHECK1-NEXT:    [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16*
// CHECK1-NEXT:    store i16 [[TMP45]], i16* [[CONV3]], align 2
// CHECK1-NEXT:    [[TMP46:%.*]] = load i64, i64* [[AA_CASTED]], align 8
// CHECK1-NEXT:    [[TMP47:%.*]] = mul nuw i64 [[TMP1]], 4
// CHECK1-NEXT:    [[TMP48:%.*]] = mul nuw i64 5, [[TMP4]]
// CHECK1-NEXT:    [[TMP49:%.*]] = mul nuw i64 [[TMP48]], 8
// CHECK1-NEXT:    [[TMP50:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
// CHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP50]], i8* align 8 bitcast ([9 x i64]* @.offload_sizes.1 to i8*), i64 72, i1 false)
// CHECK1-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
// CHECK1-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i64*
// CHECK1-NEXT:    store i64 [[TMP46]], i64* [[TMP52]], align 8
// CHECK1-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
// CHECK1-NEXT:    [[TMP54:%.*]] = bitcast i8** [[TMP53]] to i64*
// CHECK1-NEXT:    store i64 [[TMP46]], i64* [[TMP54]], align 8
// CHECK1-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 0
// CHECK1-NEXT:    store i8* null, i8** [[TMP55]], align 8
// CHECK1-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
// CHECK1-NEXT:    [[TMP57:%.*]] = bitcast i8** [[TMP56]] to [10 x float]**
// CHECK1-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP57]], align 8
// CHECK1-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
// CHECK1-NEXT:    [[TMP59:%.*]] = bitcast i8** [[TMP58]] to [10 x float]**
// CHECK1-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP59]], align 8
// CHECK1-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 1
// CHECK1-NEXT:    store i8* null, i8** [[TMP60]], align 8
// CHECK1-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
// CHECK1-NEXT:    [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i64*
// CHECK1-NEXT:    store i64 [[TMP1]], i64* [[TMP62]], align 8
// CHECK1-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
// CHECK1-NEXT:    [[TMP64:%.*]] = bitcast i8** [[TMP63]] to i64*
// CHECK1-NEXT:    store i64 [[TMP1]], i64* [[TMP64]], align 8
// CHECK1-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 2
// CHECK1-NEXT:    store i8* null, i8** [[TMP65]], align 8
// CHECK1-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 3
// CHECK1-NEXT:    [[TMP67:%.*]] = bitcast i8** [[TMP66]] to float**
// CHECK1-NEXT:    store float* [[VLA]], float** [[TMP67]], align 8
// CHECK1-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 3
// CHECK1-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to float**
// CHECK1-NEXT:    store float* [[VLA]], float** [[TMP69]], align 8
// CHECK1-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
// CHECK1-NEXT:    store i64 [[TMP47]], i64* [[TMP70]], align 8
// CHECK1-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 3
// CHECK1-NEXT:    store i8* null, i8** [[TMP71]], align 8
// CHECK1-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 4
// CHECK1-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to [5 x [10 x double]]**
// CHECK1-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP73]], align 8
// CHECK1-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 4
// CHECK1-NEXT:    [[TMP75:%.*]] = bitcast i8** [[TMP74]] to [5 x [10 x double]]**
// CHECK1-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP75]], align 8
// CHECK1-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 4
// CHECK1-NEXT:    store i8* null, i8** [[TMP76]], align 8
// CHECK1-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 5
// CHECK1-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i64*
// CHECK1-NEXT:    store i64 5, i64* [[TMP78]], align 8
// CHECK1-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 5
// CHECK1-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i64*
// CHECK1-NEXT:    store i64 5, i64* [[TMP80]], align 8
// CHECK1-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 5
// CHECK1-NEXT:    store i8* null, i8** [[TMP81]], align 8
// CHECK1-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 6
// CHECK1-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i64*
// CHECK1-NEXT:    store i64 [[TMP4]], i64* [[TMP83]], align 8
// CHECK1-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 6
// CHECK1-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i64*
// CHECK1-NEXT:    store i64 [[TMP4]], i64* [[TMP85]], align 8
// CHECK1-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 6
// CHECK1-NEXT:    store i8* null, i8** [[TMP86]], align 8
// CHECK1-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 7
// CHECK1-NEXT:    [[TMP88:%.*]] = bitcast i8** [[TMP87]] to double**
// CHECK1-NEXT:    store double* [[VLA1]], double** [[TMP88]], align 8
// CHECK1-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 7
// CHECK1-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to double**
// CHECK1-NEXT:    store double* [[VLA1]], double** [[TMP90]], align 8
// CHECK1-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
// CHECK1-NEXT:    store i64 [[TMP49]], i64* [[TMP91]], align 8
// CHECK1-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 7
// CHECK1-NEXT:    store i8* null, i8** [[TMP92]], align 8
// CHECK1-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 8
// CHECK1-NEXT:    [[TMP94:%.*]] = bitcast i8** [[TMP93]] to %struct.TT**
// CHECK1-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP94]], align 8
// CHECK1-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 8
// CHECK1-NEXT:    [[TMP96:%.*]] = bitcast i8** [[TMP95]] to %struct.TT**
// CHECK1-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP96]], align 8
// CHECK1-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 8
// CHECK1-NEXT:    store i8* null, i8** [[TMP97]], align 8
// CHECK1-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
// CHECK1-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
// CHECK1-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
// CHECK1-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 0
// CHECK1-NEXT:    store i32 2, i32* [[TMP101]], align 4
// CHECK1-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 1
// CHECK1-NEXT:    store i32 9, i32* [[TMP102]], align 4
// CHECK1-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 2
// CHECK1-NEXT:    store i8** [[TMP98]], i8*** [[TMP103]], align 8
// CHECK1-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 3
// CHECK1-NEXT:    store i8** [[TMP99]], i8*** [[TMP104]], align 8
// CHECK1-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 4
// CHECK1-NEXT:    store i64* [[TMP100]], i64** [[TMP105]], align 8
// CHECK1-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 5
// CHECK1-NEXT:    store i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.2, i32 0, i32 0), i64** [[TMP106]], align 8
// CHECK1-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 6
// CHECK1-NEXT:    store i8** null, i8*** [[TMP107]], align 8
// CHECK1-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 7
// CHECK1-NEXT:    store i8** null, i8*** [[TMP108]], align 8
// CHECK1-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 8
// CHECK1-NEXT:    store i64 0, i64* [[TMP109]], align 8
// CHECK1-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 9
// CHECK1-NEXT:    store i64 0, i64* [[TMP110]], align 8
// CHECK1-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 10
// CHECK1-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP111]], align 4
// CHECK1-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 11
// CHECK1-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP112]], align 4
// CHECK1-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 12
// CHECK1-NEXT:    store i32 0, i32* [[TMP113]], align 4
// CHECK1-NEXT:    [[TMP114:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]])
// CHECK1-NEXT:    [[TMP115:%.*]] = icmp ne i32 [[TMP114]], 0
// CHECK1-NEXT:    br i1 [[TMP115]], label [[OMP_OFFLOAD_FAILED8:%.*]], label [[OMP_OFFLOAD_CONT9:%.*]]
// CHECK1:       omp_offload.failed8:
// CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70(i64 [[TMP46]], [10 x float]* [[B]], i64 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP4]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR3]]
// CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT9]]
// CHECK1:       omp_offload.cont9:
// CHECK1-NEXT:    [[TMP116:%.*]] = load double*, double** [[PTR_ADDR]], align 8
// CHECK1-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0
// CHECK1-NEXT:    [[TMP118:%.*]] = bitcast i8** [[TMP117]] to double**
// CHECK1-NEXT:    store double* [[TMP116]], double** [[TMP118]], align 8
// CHECK1-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 0
// CHECK1-NEXT:    [[TMP120:%.*]] = bitcast i8** [[TMP119]] to double**
// CHECK1-NEXT:    store double* [[TMP116]], double** [[TMP120]], align 8
// CHECK1-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 0
// CHECK1-NEXT:    store i8* null, i8** [[TMP121]], align 8
// CHECK1-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 1
// CHECK1-NEXT:    [[TMP123:%.*]] = bitcast i8** [[TMP122]] to %struct.TT.0**
// CHECK1-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[TMP123]], align 8
// CHECK1-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 1
// CHECK1-NEXT:    [[TMP125:%.*]] = bitcast i8** [[TMP124]] to %struct.TT.0**
// CHECK1-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[TMP125]], align 8
// CHECK1-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 1
// CHECK1-NEXT:    store i8* null, i8** [[TMP126]], align 8
// CHECK1-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0
// CHECK1-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 0
// CHECK1-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 0
// CHECK1-NEXT:    store i32 2, i32* [[TMP129]], align 4
// CHECK1-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 1
// CHECK1-NEXT:    store i32 2, i32* [[TMP130]], align 4
// CHECK1-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 2
// CHECK1-NEXT:    store i8** [[TMP127]], i8*** [[TMP131]], align 8
// CHECK1-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 3
// CHECK1-NEXT:    store i8** [[TMP128]], i8*** [[TMP132]], align 8
// CHECK1-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 4
// CHECK1-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.3, i32 0, i32 0), i64** [[TMP133]], align 8
// CHECK1-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 5
// CHECK1-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.4, i32 0, i32 0), i64** [[TMP134]], align 8
// CHECK1-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 6
// CHECK1-NEXT:    store i8** null, i8*** [[TMP135]], align 8
// CHECK1-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 7
// CHECK1-NEXT:    store i8** null, i8*** [[TMP136]], align 8
// CHECK1-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 8
// CHECK1-NEXT:    store i64 0, i64* [[TMP137]], align 8
// CHECK1-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 9
// CHECK1-NEXT:    store i64 0, i64* [[TMP138]], align 8
// CHECK1-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 10
// CHECK1-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP139]], align 4
// CHECK1-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 11
// CHECK1-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP140]], align 4
// CHECK1-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 12
// CHECK1-NEXT:    store i32 0, i32* [[TMP141]], align 4
// CHECK1-NEXT:    [[TMP142:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]])
// CHECK1-NEXT:    [[TMP143:%.*]] = icmp ne i32 [[TMP142]], 0
// CHECK1-NEXT:    br i1 [[TMP143]], label [[OMP_OFFLOAD_FAILED14:%.*]], label [[OMP_OFFLOAD_CONT15:%.*]]
// CHECK1:       omp_offload.failed14:
// CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111(double* [[TMP116]], %struct.TT.0* [[E]]) #[[ATTR3]]
// CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT15]]
// CHECK1:       omp_offload.cont15:
// CHECK1-NEXT:    [[TMP144:%.*]] = load i32, i32* [[A]], align 4
// CHECK1-NEXT:    [[TMP145:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
// CHECK1-NEXT:    call void @llvm.stackrestore(i8* [[TMP145]])
// CHECK1-NEXT:    ret i32 [[TMP144]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63
// CHECK1-SAME: (i64 noundef [[A:%.*]], i32* noundef [[P:%.*]], i64 noundef [[GA:%.*]]) #[[ATTR2:[0-9]+]] {
// CHECK1-NEXT:  entry:
// CHECK1-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT:    [[P_ADDR:%.*]] = alloca i32*, align 8
// CHECK1-NEXT:    [[GA_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT:    store i64 [[A]], i64* [[A_ADDR]], align 8
// CHECK1-NEXT:    store i32* [[P]], i32** [[P_ADDR]], align 8
// CHECK1-NEXT:    store i64 [[GA]], i64* [[GA_ADDR]], align 8
// CHECK1-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
// CHECK1-NEXT:    [[CONV1:%.*]] = bitcast i64* [[GA_ADDR]] to i32*
// CHECK1-NEXT:    ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70
// CHECK1-SAME: (i64 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR2]] {
// CHECK1-NEXT:  entry:
// CHECK1-NEXT:    [[AA_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT:    [[B_ADDR:%.*]] = alloca [10 x float]*, align 8
// CHECK1-NEXT:    [[VLA_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT:    [[BN_ADDR:%.*]] = alloca float*, align 8
// CHECK1-NEXT:    [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 8
// CHECK1-NEXT:    [[VLA_ADDR2:%.*]] = alloca i64, align 8
// CHECK1-NEXT:    [[VLA_ADDR4:%.*]] = alloca i64, align 8
// CHECK1-NEXT:    [[CN_ADDR:%.*]] = alloca double*, align 8
// CHECK1-NEXT:    [[D_ADDR:%.*]] = alloca %struct.TT*, align 8
// CHECK1-NEXT:    [[B5:%.*]] = alloca [10 x float], align 4
// CHECK1-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 8
// CHECK1-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// CHECK1-NEXT:    [[C7:%.*]] = alloca [5 x [10 x double]], align 8
// CHECK1-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i64, align 8
// CHECK1-NEXT:    [[__VLA_EXPR2:%.*]] = alloca i64, align 8
// CHECK1-NEXT:    [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 8
// CHECK1-NEXT:    store i64 [[AA]], i64* [[AA_ADDR]], align 8
// CHECK1-NEXT:    store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8
// CHECK1-NEXT:    store i64 [[VLA]], i64* [[VLA_ADDR]], align 8
// CHECK1-NEXT:    store float* [[BN]], float** [[BN_ADDR]], align 8
// CHECK1-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 8
// CHECK1-NEXT:    store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8
// CHECK1-NEXT:    store i64 [[VLA3]], i64* [[VLA_ADDR4]], align 8
// CHECK1-NEXT:    store double* [[CN]], double** [[CN_ADDR]], align 8
// CHECK1-NEXT:    store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 8
// CHECK1-NEXT:    [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16*
// CHECK1-NEXT:    [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 8
// CHECK1-NEXT:    [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8
// CHECK1-NEXT:    [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 8
// CHECK1-NEXT:    [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 8
// CHECK1-NEXT:    [[TMP4:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8
// CHECK1-NEXT:    [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8
// CHECK1-NEXT:    [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8
// CHECK1-NEXT:    [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8
// CHECK1-NEXT:    [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8*
// CHECK1-NEXT:    [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8*
// CHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i64 40, i1 false)
// CHECK1-NEXT:    [[TMP10:%.*]] = call i8* @llvm.stacksave()
// CHECK1-NEXT:    store i8* [[TMP10]], i8** [[SAVED_STACK]], align 8
// CHECK1-NEXT:    [[VLA6:%.*]] = alloca float, i64 [[TMP1]], align 4
// CHECK1-NEXT:    store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8
// CHECK1-NEXT:    [[TMP11:%.*]] = mul nuw i64 [[TMP1]], 4
// CHECK1-NEXT:    [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8*
// CHECK1-NEXT:    [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8*
// CHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 [[TMP11]], i1 false)
// CHECK1-NEXT:    [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8*
// CHECK1-NEXT:    [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8*
// CHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 400, i1 false)
// CHECK1-NEXT:    [[TMP16:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]]
// CHECK1-NEXT:    [[VLA8:%.*]] = alloca double, i64 [[TMP16]], align 8
// CHECK1-NEXT:    store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8
// CHECK1-NEXT:    store i64 [[TMP5]], i64* [[__VLA_EXPR2]], align 8
// CHECK1-NEXT:    [[TMP17:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]]
// CHECK1-NEXT:    [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 8
// CHECK1-NEXT:    [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8*
// CHECK1-NEXT:    [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8*
// CHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i64 [[TMP18]], i1 false)
// CHECK1-NEXT:    [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8*
// CHECK1-NEXT:    [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8*
// CHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP21]], i8* align 8 [[TMP22]], i64 16, i1 false)
// CHECK1-NEXT:    [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2
// CHECK1-NEXT:    [[CONV10:%.*]] = sext i16 [[TMP23]] to i32
// CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV10]], 1
// CHECK1-NEXT:    [[CONV11:%.*]] = trunc i32 [[ADD]] to i16
// CHECK1-NEXT:    store i16 [[CONV11]], i16* [[CONV]], align 2
// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i64 0, i64 2
// CHECK1-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX]], align 4
// CHECK1-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i64 3
// CHECK1-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX12]], align 4
// CHECK1-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i64 0, i64 1
// CHECK1-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i64 0, i64 2
// CHECK1-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX14]], align 8
// CHECK1-NEXT:    [[TMP24:%.*]] = mul nsw i64 1, [[TMP5]]
// CHECK1-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i64 [[TMP24]]
// CHECK1-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i64 3
// CHECK1-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX16]], align 8
// CHECK1-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0
// CHECK1-NEXT:    store i64 1, i64* [[X]], align 8
// CHECK1-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1
// CHECK1-NEXT:    store i8 1, i8* [[Y]], align 8
// CHECK1-NEXT:    [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
// CHECK1-NEXT:    call void @llvm.stackrestore(i8* [[TMP25]])
// CHECK1-NEXT:    ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111
// CHECK1-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR2]] {
// CHECK1-NEXT:  entry:
// CHECK1-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 8
// CHECK1-NEXT:    [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 8
// CHECK1-NEXT:    [[E1:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
// CHECK1-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 8
// CHECK1-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 8
// CHECK1-NEXT:    [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 8
// CHECK1-NEXT:    [[TMP1:%.*]] = bitcast %struct.TT.0* [[E1]] to i8*
// CHECK1-NEXT:    [[TMP2:%.*]] = bitcast %struct.TT.0* [[TMP0]] to i8*
// CHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 8, i1 false)
// CHECK1-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E1]], i32 0, i32 0
// CHECK1-NEXT:    [[TMP3:%.*]] = load i32, i32* [[X]], align 4
// CHECK1-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP3]] to double
// CHECK1-NEXT:    [[TMP4:%.*]] = load double*, double** [[PTR_ADDR]], align 8
// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP4]], i64 0
// CHECK1-NEXT:    store double [[CONV]], double* [[ARRAYIDX]], align 8
// CHECK1-NEXT:    [[TMP5:%.*]] = load double*, double** [[PTR_ADDR]], align 8
// CHECK1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[TMP5]], i64 0
// CHECK1-NEXT:    [[TMP6:%.*]] = load double, double* [[ARRAYIDX2]], align 8
// CHECK1-NEXT:    [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00
// CHECK1-NEXT:    store double [[INC]], double* [[ARRAYIDX2]], align 8
// CHECK1-NEXT:    ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@_Z3bariPd
// CHECK1-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] {
// CHECK1-NEXT:  entry:
// CHECK1-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 8
// CHECK1-NEXT:    [[A:%.*]] = alloca i32, align 4
// CHECK1-NEXT:    [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 8
// CHECK1-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK1-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 8
// CHECK1-NEXT:    store i32 0, i32* [[A]], align 4
// CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK1-NEXT:    [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 8
// CHECK1-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z3fooiPd(i32 noundef signext [[TMP0]], double* noundef [[TMP1]])
// CHECK1-NEXT:    [[TMP2:%.*]] = load i32, i32* [[A]], align 4
// CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]]
// CHECK1-NEXT:    store i32 [[ADD]], i32* [[A]], align 4
// CHECK1-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK1-NEXT:    [[CALL1:%.*]] = call noundef signext i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 8 dereferenceable(8) [[S]], i32 noundef signext [[TMP3]])
// CHECK1-NEXT:    [[TMP4:%.*]] = load i32, i32* [[A]], align 4
// CHECK1-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]]
// CHECK1-NEXT:    store i32 [[ADD2]], i32* [[A]], align 4
// CHECK1-NEXT:    [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK1-NEXT:    [[CALL3:%.*]] = call noundef signext i32 @_ZL7fstatici(i32 noundef signext [[TMP5]])
// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A]], align 4
// CHECK1-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]]
// CHECK1-NEXT:    store i32 [[ADD4]], i32* [[A]], align 4
// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK1-NEXT:    [[CALL5:%.*]] = call noundef signext i32 @_Z9ftemplateIiET_i(i32 noundef signext [[TMP7]])
// CHECK1-NEXT:    [[TMP8:%.*]] = load i32, i32* [[A]], align 4
// CHECK1-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]]
// CHECK1-NEXT:    store i32 [[ADD6]], i32* [[A]], align 4
// CHECK1-NEXT:    [[TMP9:%.*]] = load i32, i32* [[A]], align 4
// CHECK1-NEXT:    ret i32 [[TMP9]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@_ZN2S12r1Ei
// CHECK1-SAME: (%struct.S1* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat align 2 {
// CHECK1-NEXT:  entry:
// CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
// CHECK1-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT:    [[B:%.*]] = alloca i32, align 4
// CHECK1-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 8
// CHECK1-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// CHECK1-NEXT:    [[B_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [5 x i8*], align 8
// CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x i8*], align 8
// CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x i8*], align 8
// CHECK1-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 8
// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK1-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
// CHECK1-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK1-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
// CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// CHECK1-NEXT:    store i32 [[ADD]], i32* [[B]], align 4
// CHECK1-NEXT:    [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK1-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
// CHECK1-NEXT:    [[TMP3:%.*]] = call i8* @llvm.stacksave()
// CHECK1-NEXT:    store i8* [[TMP3]], i8** [[SAVED_STACK]], align 8
// CHECK1-NEXT:    [[TMP4:%.*]] = mul nuw i64 2, [[TMP2]]
// CHECK1-NEXT:    [[VLA:%.*]] = alloca i16, i64 [[TMP4]], align 2
// CHECK1-NEXT:    store i64 [[TMP2]], i64* [[__VLA_EXPR0]], align 8
// CHECK1-NEXT:    [[TMP5:%.*]] = load i32, i32* [[B]], align 4
// CHECK1-NEXT:    [[CONV:%.*]] = bitcast i64* [[B_CASTED]] to i32*
// CHECK1-NEXT:    store i32 [[TMP5]], i32* [[CONV]], align 4
// CHECK1-NEXT:    [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8
// CHECK1-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
// CHECK1-NEXT:    [[TMP7:%.*]] = mul nuw i64 2, [[TMP2]]
// CHECK1-NEXT:    [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2
// CHECK1-NEXT:    [[TMP9:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
// CHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP9]], i8* align 8 bitcast ([5 x i64]* @.offload_sizes.5 to i8*), i64 40, i1 false)
// CHECK1-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
// CHECK1-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 8
// CHECK1-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
// CHECK1-NEXT:    store double* [[A]], double** [[TMP13]], align 8
// CHECK1-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
// CHECK1-NEXT:    store i8* null, i8** [[TMP14]], align 8
// CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK1-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64*
// CHECK1-NEXT:    store i64 [[TMP6]], i64* [[TMP16]], align 8
// CHECK1-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK1-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i64*
// CHECK1-NEXT:    store i64 [[TMP6]], i64* [[TMP18]], align 8
// CHECK1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
// CHECK1-NEXT:    store i8* null, i8** [[TMP19]], align 8
// CHECK1-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK1-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i64*
// CHECK1-NEXT:    store i64 2, i64* [[TMP21]], align 8
// CHECK1-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK1-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i64*
// CHECK1-NEXT:    store i64 2, i64* [[TMP23]], align 8
// CHECK1-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
// CHECK1-NEXT:    store i8* null, i8** [[TMP24]], align 8
// CHECK1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
// CHECK1-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64*
// CHECK1-NEXT:    store i64 [[TMP2]], i64* [[TMP26]], align 8
// CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
// CHECK1-NEXT:    [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i64*
// CHECK1-NEXT:    store i64 [[TMP2]], i64* [[TMP28]], align 8
// CHECK1-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
// CHECK1-NEXT:    store i8* null, i8** [[TMP29]], align 8
// CHECK1-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
// CHECK1-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i16**
// CHECK1-NEXT:    store i16* [[VLA]], i16** [[TMP31]], align 8
// CHECK1-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
// CHECK1-NEXT:    [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i16**
// CHECK1-NEXT:    store i16* [[VLA]], i16** [[TMP33]], align 8
// CHECK1-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
// CHECK1-NEXT:    store i64 [[TMP8]], i64* [[TMP34]], align 8
// CHECK1-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
// CHECK1-NEXT:    store i8* null, i8** [[TMP35]], align 8
// CHECK1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
// CHECK1-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK1-NEXT:    store i32 2, i32* [[TMP39]], align 4
// CHECK1-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK1-NEXT:    store i32 5, i32* [[TMP40]], align 4
// CHECK1-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK1-NEXT:    store i8** [[TMP36]], i8*** [[TMP41]], align 8
// CHECK1-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK1-NEXT:    store i8** [[TMP37]], i8*** [[TMP42]], align 8
// CHECK1-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK1-NEXT:    store i64* [[TMP38]], i64** [[TMP43]], align 8
// CHECK1-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK1-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.6, i32 0, i32 0), i64** [[TMP44]], align 8
// CHECK1-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK1-NEXT:    store i8** null, i8*** [[TMP45]], align 8
// CHECK1-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK1-NEXT:    store i8** null, i8*** [[TMP46]], align 8
// CHECK1-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK1-NEXT:    store i64 0, i64* [[TMP47]], align 8
// CHECK1-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK1-NEXT:    store i64 0, i64* [[TMP48]], align 8
// CHECK1-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK1-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP49]], align 4
// CHECK1-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK1-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP50]], align 4
// CHECK1-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK1-NEXT:    store i32 0, i32* [[TMP51]], align 4
// CHECK1-NEXT:    [[TMP52:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK1-NEXT:    [[TMP53:%.*]] = icmp ne i32 [[TMP52]], 0
// CHECK1-NEXT:    br i1 [[TMP53]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK1:       omp_offload.failed:
// CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR3]]
// CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
// CHECK1:       omp_offload.cont:
// CHECK1-NEXT:    [[TMP54:%.*]] = mul nsw i64 1, [[TMP2]]
// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP54]]
// CHECK1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1
// CHECK1-NEXT:    [[TMP55:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
// CHECK1-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP55]] to i32
// CHECK1-NEXT:    [[TMP56:%.*]] = load i32, i32* [[B]], align 4
// CHECK1-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP56]]
// CHECK1-NEXT:    [[TMP57:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
// CHECK1-NEXT:    call void @llvm.stackrestore(i8* [[TMP57]])
// CHECK1-NEXT:    ret i32 [[ADD4]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@_ZL7fstatici
// CHECK1-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] {
// CHECK1-NEXT:  entry:
// CHECK1-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT:    [[A:%.*]] = alloca i32, align 4
// CHECK1-NEXT:    [[AAA:%.*]] = alloca i8, align 1
// CHECK1-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
// CHECK1-NEXT:    [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT:    [[AAA_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 8
// CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 8
// CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 8
// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK1-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK1-NEXT:    store i32 0, i32* [[A]], align 4
// CHECK1-NEXT:    store i8 0, i8* [[AAA]], align 1
// CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
// CHECK1-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32*
// CHECK1-NEXT:    store i32 [[TMP0]], i32* [[CONV]], align 4
// CHECK1-NEXT:    [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8
// CHECK1-NEXT:    [[TMP2:%.*]] = load i8, i8* [[AAA]], align 1
// CHECK1-NEXT:    [[CONV1:%.*]] = bitcast i64* [[AAA_CASTED]] to i8*
// CHECK1-NEXT:    store i8 [[TMP2]], i8* [[CONV1]], align 1
// CHECK1-NEXT:    [[TMP3:%.*]] = load i64, i64* [[AAA_CASTED]], align 8
// CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-NEXT:    [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i64*
// CHECK1-NEXT:    store i64 [[TMP1]], i64* [[TMP5]], align 8
// CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64*
// CHECK1-NEXT:    store i64 [[TMP1]], i64* [[TMP7]], align 8
// CHECK1-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
// CHECK1-NEXT:    store i8* null, i8** [[TMP8]], align 8
// CHECK1-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK1-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
// CHECK1-NEXT:    store i64 [[TMP3]], i64* [[TMP10]], align 8
// CHECK1-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK1-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i64*
// CHECK1-NEXT:    store i64 [[TMP3]], i64* [[TMP12]], align 8
// CHECK1-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
// CHECK1-NEXT:    store i8* null, i8** [[TMP13]], align 8
// CHECK1-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK1-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to [10 x i32]**
// CHECK1-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP15]], align 8
// CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK1-NEXT:    [[TMP17:%.*]] = bitcast i8** [[TMP16]] to [10 x i32]**
// CHECK1-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP17]], align 8
// CHECK1-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
// CHECK1-NEXT:    store i8* null, i8** [[TMP18]], align 8
// CHECK1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK1-NEXT:    store i32 2, i32* [[TMP21]], align 4
// CHECK1-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK1-NEXT:    store i32 3, i32* [[TMP22]], align 4
// CHECK1-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK1-NEXT:    store i8** [[TMP19]], i8*** [[TMP23]], align 8
// CHECK1-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK1-NEXT:    store i8** [[TMP20]], i8*** [[TMP24]], align 8
// CHECK1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK1-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.7, i32 0, i32 0), i64** [[TMP25]], align 8
// CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK1-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.8, i32 0, i32 0), i64** [[TMP26]], align 8
// CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK1-NEXT:    store i8** null, i8*** [[TMP27]], align 8
// CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK1-NEXT:    store i8** null, i8*** [[TMP28]], align 8
// CHECK1-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK1-NEXT:    store i64 0, i64* [[TMP29]], align 8
// CHECK1-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK1-NEXT:    store i64 0, i64* [[TMP30]], align 8
// CHECK1-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK1-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP31]], align 4
// CHECK1-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK1-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP32]], align 4
// CHECK1-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK1-NEXT:    store i32 0, i32* [[TMP33]], align 4
// CHECK1-NEXT:    [[TMP34:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK1-NEXT:    [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0
// CHECK1-NEXT:    br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK1:       omp_offload.failed:
// CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142(i64 [[TMP1]], i64 [[TMP3]], [10 x i32]* [[B]]) #[[ATTR3]]
// CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
// CHECK1:       omp_offload.cont:
// CHECK1-NEXT:    [[TMP36:%.*]] = load i32, i32* [[A]], align 4
// CHECK1-NEXT:    ret i32 [[TMP36]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i
// CHECK1-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat {
// CHECK1-NEXT:  entry:
// CHECK1-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT:    [[A:%.*]] = alloca i32, align 4
// CHECK1-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
// CHECK1-NEXT:    [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x i8*], align 8
// CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x i8*], align 8
// CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x i8*], align 8
// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK1-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK1-NEXT:    store i32 0, i32* [[A]], align 4
// CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
// CHECK1-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32*
// CHECK1-NEXT:    store i32 [[TMP0]], i32* [[CONV]], align 4
// CHECK1-NEXT:    [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8
// CHECK1-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-NEXT:    [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i64*
// CHECK1-NEXT:    store i64 [[TMP1]], i64* [[TMP3]], align 8
// CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT:    [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i64*
// CHECK1-NEXT:    store i64 [[TMP1]], i64* [[TMP5]], align 8
// CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
// CHECK1-NEXT:    store i8* null, i8** [[TMP6]], align 8
// CHECK1-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK1-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to [10 x i32]**
// CHECK1-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP8]], align 8
// CHECK1-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK1-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to [10 x i32]**
// CHECK1-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP10]], align 8
// CHECK1-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
// CHECK1-NEXT:    store i8* null, i8** [[TMP11]], align 8
// CHECK1-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK1-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK1-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK1-NEXT:    store i32 2, i32* [[TMP14]], align 4
// CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK1-NEXT:    store i32 2, i32* [[TMP15]], align 4
// CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK1-NEXT:    store i8** [[TMP12]], i8*** [[TMP16]], align 8
// CHECK1-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK1-NEXT:    store i8** [[TMP13]], i8*** [[TMP17]], align 8
// CHECK1-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK1-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.9, i32 0, i32 0), i64** [[TMP18]], align 8
// CHECK1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK1-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.10, i32 0, i32 0), i64** [[TMP19]], align 8
// CHECK1-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK1-NEXT:    store i8** null, i8*** [[TMP20]], align 8
// CHECK1-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK1-NEXT:    store i8** null, i8*** [[TMP21]], align 8
// CHECK1-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK1-NEXT:    store i64 0, i64* [[TMP22]], align 8
// CHECK1-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK1-NEXT:    store i64 0, i64* [[TMP23]], align 8
// CHECK1-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK1-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP24]], align 4
// CHECK1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK1-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP25]], align 4
// CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK1-NEXT:    store i32 0, i32* [[TMP26]], align 4
// CHECK1-NEXT:    [[TMP27:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK1-NEXT:    [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0
// CHECK1-NEXT:    br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK1:       omp_offload.failed:
// CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128(i64 [[TMP1]], [10 x i32]* [[B]]) #[[ATTR3]]
// CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
// CHECK1:       omp_offload.cont:
// CHECK1-NEXT:    [[TMP29:%.*]] = load i32, i32* [[A]], align 4
// CHECK1-NEXT:    ret i32 [[TMP29]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167
// CHECK1-SAME: (%struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] {
// CHECK1-NEXT:  entry:
// CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
// CHECK1-NEXT:    [[B_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT:    [[VLA_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT:    [[VLA_ADDR2:%.*]] = alloca i64, align 8
// CHECK1-NEXT:    [[C_ADDR:%.*]] = alloca i16*, align 8
// CHECK1-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 8
// CHECK1-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// CHECK1-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i64, align 8
// CHECK1-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
// CHECK1-NEXT:    store i64 [[B]], i64* [[B_ADDR]], align 8
// CHECK1-NEXT:    store i64 [[VLA]], i64* [[VLA_ADDR]], align 8
// CHECK1-NEXT:    store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8
// CHECK1-NEXT:    store i16* [[C]], i16** [[C_ADDR]], align 8
// CHECK1-NEXT:    [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
// CHECK1-NEXT:    [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32*
// CHECK1-NEXT:    [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8
// CHECK1-NEXT:    [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8
// CHECK1-NEXT:    [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8
// CHECK1-NEXT:    [[TMP4:%.*]] = call i8* @llvm.stacksave()
// CHECK1-NEXT:    store i8* [[TMP4]], i8** [[SAVED_STACK]], align 8
// CHECK1-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]]
// CHECK1-NEXT:    [[VLA3:%.*]] = alloca i16, i64 [[TMP5]], align 2
// CHECK1-NEXT:    store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8
// CHECK1-NEXT:    store i64 [[TMP2]], i64* [[__VLA_EXPR1]], align 8
// CHECK1-NEXT:    [[TMP6:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]]
// CHECK1-NEXT:    [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 2
// CHECK1-NEXT:    [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8*
// CHECK1-NEXT:    [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8*
// CHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i64 [[TMP7]], i1 false)
// CHECK1-NEXT:    [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4
// CHECK1-NEXT:    [[CONV4:%.*]] = sitofp i32 [[TMP10]] to double
// CHECK1-NEXT:    [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00
// CHECK1-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0
// CHECK1-NEXT:    store double [[ADD]], double* [[A]], align 8
// CHECK1-NEXT:    [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0
// CHECK1-NEXT:    [[TMP11:%.*]] = load double, double* [[A5]], align 8
// CHECK1-NEXT:    [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00
// CHECK1-NEXT:    store double [[INC]], double* [[A5]], align 8
// CHECK1-NEXT:    [[CONV6:%.*]] = fptosi double [[INC]] to i16
// CHECK1-NEXT:    [[TMP12:%.*]] = mul nsw i64 1, [[TMP2]]
// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i64 [[TMP12]]
// CHECK1-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1
// CHECK1-NEXT:    store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2
// CHECK1-NEXT:    [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
// CHECK1-NEXT:    call void @llvm.stackrestore(i8* [[TMP13]])
// CHECK1-NEXT:    ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142
// CHECK1-SAME: (i64 noundef [[A:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
// CHECK1-NEXT:  entry:
// CHECK1-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT:    [[AAA_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT:    [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8
// CHECK1-NEXT:    [[B2:%.*]] = alloca [10 x i32], align 4
// CHECK1-NEXT:    store i64 [[A]], i64* [[A_ADDR]], align 8
// CHECK1-NEXT:    store i64 [[AAA]], i64* [[AAA_ADDR]], align 8
// CHECK1-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8
// CHECK1-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
// CHECK1-NEXT:    [[CONV1:%.*]] = bitcast i64* [[AAA_ADDR]] to i8*
// CHECK1-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8
// CHECK1-NEXT:    [[TMP1:%.*]] = bitcast [10 x i32]* [[B2]] to i8*
// CHECK1-NEXT:    [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
// CHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false)
// CHECK1-NEXT:    [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4
// CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// CHECK1-NEXT:    store i32 [[ADD]], i32* [[CONV]], align 4
// CHECK1-NEXT:    [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 1
// CHECK1-NEXT:    [[CONV3:%.*]] = sext i8 [[TMP4]] to i32
// CHECK1-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1
// CHECK1-NEXT:    [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8
// CHECK1-NEXT:    store i8 [[CONV5]], i8* [[CONV1]], align 1
// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B2]], i64 0, i64 2
// CHECK1-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// CHECK1-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP5]], 1
// CHECK1-NEXT:    store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4
// CHECK1-NEXT:    ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128
// CHECK1-SAME: (i64 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
// CHECK1-NEXT:  entry:
// CHECK1-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT:    [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8
// CHECK1-NEXT:    [[B1:%.*]] = alloca [10 x i32], align 4
// CHECK1-NEXT:    store i64 [[A]], i64* [[A_ADDR]], align 8
// CHECK1-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8
// CHECK1-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
// CHECK1-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8
// CHECK1-NEXT:    [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8*
// CHECK1-NEXT:    [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
// CHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false)
// CHECK1-NEXT:    [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4
// CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// CHECK1-NEXT:    store i32 [[ADD]], i32* [[CONV]], align 4
// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i64 0, i64 2
// CHECK1-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// CHECK1-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1
// CHECK1-NEXT:    store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4
// CHECK1-NEXT:    ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
// CHECK1-SAME: () #[[ATTR5:[0-9]+]] {
// CHECK1-NEXT:  entry:
// CHECK1-NEXT:    call void @__tgt_register_requires(i64 1)
// CHECK1-NEXT:    ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@_Z3fooiPd
// CHECK2-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK2-NEXT:  entry:
// CHECK2-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 4
// CHECK2-NEXT:    [[A:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[AA:%.*]] = alloca i16, align 2
// CHECK2-NEXT:    [[B:%.*]] = alloca [10 x float], align 4
// CHECK2-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 4
// CHECK2-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[C:%.*]] = alloca [5 x [10 x double]], align 8
// CHECK2-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 4
// CHECK2-NEXT:    [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
// CHECK2-NEXT:    [[P:%.*]] = alloca i32*, align 64
// CHECK2-NEXT:    [[A_CASTED:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[GA_CASTED:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 4
// CHECK2-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 4
// CHECK2-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 4
// CHECK2-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK2-NEXT:    [[AA_CASTED:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [9 x i8*], align 4
// CHECK2-NEXT:    [[DOTOFFLOAD_PTRS3:%.*]] = alloca [9 x i8*], align 4
// CHECK2-NEXT:    [[DOTOFFLOAD_MAPPERS4:%.*]] = alloca [9 x i8*], align 4
// CHECK2-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 4
// CHECK2-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
// CHECK2-NEXT:    [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [2 x i8*], align 4
// CHECK2-NEXT:    [[DOTOFFLOAD_PTRS9:%.*]] = alloca [2 x i8*], align 4
// CHECK2-NEXT:    [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [2 x i8*], align 4
// CHECK2-NEXT:    [[KERNEL_ARGS11:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
// CHECK2-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK2-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 4
// CHECK2-NEXT:    store i32 0, i32* [[A]], align 4
// CHECK2-NEXT:    store i16 0, i16* [[AA]], align 2
// CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK2-NEXT:    [[TMP1:%.*]] = call i8* @llvm.stacksave()
// CHECK2-NEXT:    store i8* [[TMP1]], i8** [[SAVED_STACK]], align 4
// CHECK2-NEXT:    [[VLA:%.*]] = alloca float, i32 [[TMP0]], align 4
// CHECK2-NEXT:    store i32 [[TMP0]], i32* [[__VLA_EXPR0]], align 4
// CHECK2-NEXT:    [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK2-NEXT:    [[TMP3:%.*]] = mul nuw i32 5, [[TMP2]]
// CHECK2-NEXT:    [[VLA1:%.*]] = alloca double, i32 [[TMP3]], align 8
// CHECK2-NEXT:    store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4
// CHECK2-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0
// CHECK2-NEXT:    [[TMP4:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK2-NEXT:    store i32 [[TMP4]], i32* [[X]], align 4
// CHECK2-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1
// CHECK2-NEXT:    [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK2-NEXT:    store i32 [[TMP5]], i32* [[Y]], align 4
// CHECK2-NEXT:    store i32* [[A]], i32** [[P]], align 64
// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A]], align 4
// CHECK2-NEXT:    store i32 [[TMP6]], i32* [[A_CASTED]], align 4
// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[A_CASTED]], align 4
// CHECK2-NEXT:    [[TMP8:%.*]] = load i32*, i32** [[P]], align 64
// CHECK2-NEXT:    [[TMP9:%.*]] = load i32, i32* @ga, align 4
// CHECK2-NEXT:    store i32 [[TMP9]], i32* [[GA_CASTED]], align 4
// CHECK2-NEXT:    [[TMP10:%.*]] = load i32, i32* [[GA_CASTED]], align 4
// CHECK2-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK2-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32*
// CHECK2-NEXT:    store i32 [[TMP7]], i32* [[TMP12]], align 4
// CHECK2-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK2-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i32*
// CHECK2-NEXT:    store i32 [[TMP7]], i32* [[TMP14]], align 4
// CHECK2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
// CHECK2-NEXT:    store i8* null, i8** [[TMP15]], align 4
// CHECK2-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK2-NEXT:    [[TMP17:%.*]] = bitcast i8** [[TMP16]] to i32**
// CHECK2-NEXT:    store i32* [[TMP8]], i32** [[TMP17]], align 4
// CHECK2-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK2-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
// CHECK2-NEXT:    store i32* [[TMP8]], i32** [[TMP19]], align 4
// CHECK2-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
// CHECK2-NEXT:    store i8* null, i8** [[TMP20]], align 4
// CHECK2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK2-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i32*
// CHECK2-NEXT:    store i32 [[TMP10]], i32* [[TMP22]], align 4
// CHECK2-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK2-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32*
// CHECK2-NEXT:    store i32 [[TMP10]], i32* [[TMP24]], align 4
// CHECK2-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
// CHECK2-NEXT:    store i8* null, i8** [[TMP25]], align 4
// CHECK2-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK2-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK2-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK2-NEXT:    store i32 2, i32* [[TMP28]], align 4
// CHECK2-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK2-NEXT:    store i32 3, i32* [[TMP29]], align 4
// CHECK2-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK2-NEXT:    store i8** [[TMP26]], i8*** [[TMP30]], align 4
// CHECK2-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK2-NEXT:    store i8** [[TMP27]], i8*** [[TMP31]], align 4
// CHECK2-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK2-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP32]], align 4
// CHECK2-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK2-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP33]], align 4
// CHECK2-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK2-NEXT:    store i8** null, i8*** [[TMP34]], align 4
// CHECK2-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK2-NEXT:    store i8** null, i8*** [[TMP35]], align 4
// CHECK2-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK2-NEXT:    store i64 0, i64* [[TMP36]], align 8
// CHECK2-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK2-NEXT:    store i64 0, i64* [[TMP37]], align 8
// CHECK2-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK2-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP38]], align 4
// CHECK2-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK2-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP39]], align 4
// CHECK2-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK2-NEXT:    store i32 0, i32* [[TMP40]], align 4
// CHECK2-NEXT:    [[TMP41:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK2-NEXT:    [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0
// CHECK2-NEXT:    br i1 [[TMP42]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK2:       omp_offload.failed:
// CHECK2-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63(i32 [[TMP7]], i32* [[TMP8]], i32 [[TMP10]]) #[[ATTR3:[0-9]+]]
// CHECK2-NEXT:    br label [[OMP_OFFLOAD_CONT]]
// CHECK2:       omp_offload.cont:
// CHECK2-NEXT:    [[TMP43:%.*]] = load i16, i16* [[AA]], align 2
// CHECK2-NEXT:    [[CONV:%.*]] = bitcast i32* [[AA_CASTED]] to i16*
// CHECK2-NEXT:    store i16 [[TMP43]], i16* [[CONV]], align 2
// CHECK2-NEXT:    [[TMP44:%.*]] = load i32, i32* [[AA_CASTED]], align 4
// CHECK2-NEXT:    [[TMP45:%.*]] = mul nuw i32 [[TMP0]], 4
// CHECK2-NEXT:    [[TMP46:%.*]] = sext i32 [[TMP45]] to i64
// CHECK2-NEXT:    [[TMP47:%.*]] = mul nuw i32 5, [[TMP2]]
// CHECK2-NEXT:    [[TMP48:%.*]] = mul nuw i32 [[TMP47]], 8
// CHECK2-NEXT:    [[TMP49:%.*]] = sext i32 [[TMP48]] to i64
// CHECK2-NEXT:    [[TMP50:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
// CHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP50]], i8* align 4 bitcast ([9 x i64]* @.offload_sizes.1 to i8*), i32 72, i1 false)
// CHECK2-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0
// CHECK2-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32*
// CHECK2-NEXT:    store i32 [[TMP44]], i32* [[TMP52]], align 4
// CHECK2-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 0
// CHECK2-NEXT:    [[TMP54:%.*]] = bitcast i8** [[TMP53]] to i32*
// CHECK2-NEXT:    store i32 [[TMP44]], i32* [[TMP54]], align 4
// CHECK2-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 0
// CHECK2-NEXT:    store i8* null, i8** [[TMP55]], align 4
// CHECK2-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 1
// CHECK2-NEXT:    [[TMP57:%.*]] = bitcast i8** [[TMP56]] to [10 x float]**
// CHECK2-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP57]], align 4
// CHECK2-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 1
// CHECK2-NEXT:    [[TMP59:%.*]] = bitcast i8** [[TMP58]] to [10 x float]**
// CHECK2-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP59]], align 4
// CHECK2-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 1
// CHECK2-NEXT:    store i8* null, i8** [[TMP60]], align 4
// CHECK2-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 2
// CHECK2-NEXT:    [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i32*
// CHECK2-NEXT:    store i32 [[TMP0]], i32* [[TMP62]], align 4
// CHECK2-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 2
// CHECK2-NEXT:    [[TMP64:%.*]] = bitcast i8** [[TMP63]] to i32*
// CHECK2-NEXT:    store i32 [[TMP0]], i32* [[TMP64]], align 4
// CHECK2-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 2
// CHECK2-NEXT:    store i8* null, i8** [[TMP65]], align 4
// CHECK2-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 3
// CHECK2-NEXT:    [[TMP67:%.*]] = bitcast i8** [[TMP66]] to float**
// CHECK2-NEXT:    store float* [[VLA]], float** [[TMP67]], align 4
// CHECK2-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 3
// CHECK2-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to float**
// CHECK2-NEXT:    store float* [[VLA]], float** [[TMP69]], align 4
// CHECK2-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
// CHECK2-NEXT:    store i64 [[TMP46]], i64* [[TMP70]], align 4
// CHECK2-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 3
// CHECK2-NEXT:    store i8* null, i8** [[TMP71]], align 4
// CHECK2-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 4
// CHECK2-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to [5 x [10 x double]]**
// CHECK2-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP73]], align 4
// CHECK2-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 4
// CHECK2-NEXT:    [[TMP75:%.*]] = bitcast i8** [[TMP74]] to [5 x [10 x double]]**
// CHECK2-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP75]], align 4
// CHECK2-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 4
// CHECK2-NEXT:    store i8* null, i8** [[TMP76]], align 4
// CHECK2-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 5
// CHECK2-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i32*
// CHECK2-NEXT:    store i32 5, i32* [[TMP78]], align 4
// CHECK2-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 5
// CHECK2-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32*
// CHECK2-NEXT:    store i32 5, i32* [[TMP80]], align 4
// CHECK2-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 5
// CHECK2-NEXT:    store i8* null, i8** [[TMP81]], align 4
// CHECK2-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 6
// CHECK2-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32*
// CHECK2-NEXT:    store i32 [[TMP2]], i32* [[TMP83]], align 4
// CHECK2-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 6
// CHECK2-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i32*
// CHECK2-NEXT:    store i32 [[TMP2]], i32* [[TMP85]], align 4
// CHECK2-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 6
// CHECK2-NEXT:    store i8* null, i8** [[TMP86]], align 4
// CHECK2-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 7
// CHECK2-NEXT:    [[TMP88:%.*]] = bitcast i8** [[TMP87]] to double**
// CHECK2-NEXT:    store double* [[VLA1]], double** [[TMP88]], align 4
// CHECK2-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 7
// CHECK2-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to double**
// CHECK2-NEXT:    store double* [[VLA1]], double** [[TMP90]], align 4
// CHECK2-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
// CHECK2-NEXT:    store i64 [[TMP49]], i64* [[TMP91]], align 4
// CHECK2-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 7
// CHECK2-NEXT:    store i8* null, i8** [[TMP92]], align 4
// CHECK2-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 8
// CHECK2-NEXT:    [[TMP94:%.*]] = bitcast i8** [[TMP93]] to %struct.TT**
// CHECK2-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP94]], align 4
// CHECK2-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 8
// CHECK2-NEXT:    [[TMP96:%.*]] = bitcast i8** [[TMP95]] to %struct.TT**
// CHECK2-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP96]], align 4
// CHECK2-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 8
// CHECK2-NEXT:    store i8* null, i8** [[TMP97]], align 4
// CHECK2-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0
// CHECK2-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 0
// CHECK2-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
// CHECK2-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 0
// CHECK2-NEXT:    store i32 2, i32* [[TMP101]], align 4
// CHECK2-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 1
// CHECK2-NEXT:    store i32 9, i32* [[TMP102]], align 4
// CHECK2-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 2
// CHECK2-NEXT:    store i8** [[TMP98]], i8*** [[TMP103]], align 4
// CHECK2-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 3
// CHECK2-NEXT:    store i8** [[TMP99]], i8*** [[TMP104]], align 4
// CHECK2-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 4
// CHECK2-NEXT:    store i64* [[TMP100]], i64** [[TMP105]], align 4
// CHECK2-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 5
// CHECK2-NEXT:    store i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.2, i32 0, i32 0), i64** [[TMP106]], align 4
// CHECK2-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 6
// CHECK2-NEXT:    store i8** null, i8*** [[TMP107]], align 4
// CHECK2-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 7
// CHECK2-NEXT:    store i8** null, i8*** [[TMP108]], align 4
// CHECK2-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 8
// CHECK2-NEXT:    store i64 0, i64* [[TMP109]], align 8
// CHECK2-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 9
// CHECK2-NEXT:    store i64 0, i64* [[TMP110]], align 8
// CHECK2-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 10
// CHECK2-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP111]], align 4
// CHECK2-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 11
// CHECK2-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP112]], align 4
// CHECK2-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 12
// CHECK2-NEXT:    store i32 0, i32* [[TMP113]], align 4
// CHECK2-NEXT:    [[TMP114:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]])
// CHECK2-NEXT:    [[TMP115:%.*]] = icmp ne i32 [[TMP114]], 0
// CHECK2-NEXT:    br i1 [[TMP115]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]]
// CHECK2:       omp_offload.failed6:
// CHECK2-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70(i32 [[TMP44]], [10 x float]* [[B]], i32 [[TMP0]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP2]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR3]]
// CHECK2-NEXT:    br label [[OMP_OFFLOAD_CONT7]]
// CHECK2:       omp_offload.cont7:
// CHECK2-NEXT:    [[TMP116:%.*]] = load double*, double** [[PTR_ADDR]], align 4
// CHECK2-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
// CHECK2-NEXT:    [[TMP118:%.*]] = bitcast i8** [[TMP117]] to double**
// CHECK2-NEXT:    store double* [[TMP116]], double** [[TMP118]], align 4
// CHECK2-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
// CHECK2-NEXT:    [[TMP120:%.*]] = bitcast i8** [[TMP119]] to double**
// CHECK2-NEXT:    store double* [[TMP116]], double** [[TMP120]], align 4
// CHECK2-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS10]], i32 0, i32 0
// CHECK2-NEXT:    store i8* null, i8** [[TMP121]], align 4
// CHECK2-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 1
// CHECK2-NEXT:    [[TMP123:%.*]] = bitcast i8** [[TMP122]] to %struct.TT.0**
// CHECK2-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[TMP123]], align 4
// CHECK2-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 1
// CHECK2-NEXT:    [[TMP125:%.*]] = bitcast i8** [[TMP124]] to %struct.TT.0**
// CHECK2-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[TMP125]], align 4
// CHECK2-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS10]], i32 0, i32 1
// CHECK2-NEXT:    store i8* null, i8** [[TMP126]], align 4
// CHECK2-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
// CHECK2-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
// CHECK2-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 0
// CHECK2-NEXT:    store i32 2, i32* [[TMP129]], align 4
// CHECK2-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 1
// CHECK2-NEXT:    store i32 2, i32* [[TMP130]], align 4
// CHECK2-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 2
// CHECK2-NEXT:    store i8** [[TMP127]], i8*** [[TMP131]], align 4
// CHECK2-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 3
// CHECK2-NEXT:    store i8** [[TMP128]], i8*** [[TMP132]], align 4
// CHECK2-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 4
// CHECK2-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.3, i32 0, i32 0), i64** [[TMP133]], align 4
// CHECK2-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 5
// CHECK2-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.4, i32 0, i32 0), i64** [[TMP134]], align 4
// CHECK2-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 6
// CHECK2-NEXT:    store i8** null, i8*** [[TMP135]], align 4
// CHECK2-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 7
// CHECK2-NEXT:    store i8** null, i8*** [[TMP136]], align 4
// CHECK2-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 8
// CHECK2-NEXT:    store i64 0, i64* [[TMP137]], align 8
// CHECK2-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 9
// CHECK2-NEXT:    store i64 0, i64* [[TMP138]], align 8
// CHECK2-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 10
// CHECK2-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP139]], align 4
// CHECK2-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 11
// CHECK2-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP140]], align 4
// CHECK2-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 12
// CHECK2-NEXT:    store i32 0, i32* [[TMP141]], align 4
// CHECK2-NEXT:    [[TMP142:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]])
// CHECK2-NEXT:    [[TMP143:%.*]] = icmp ne i32 [[TMP142]], 0
// CHECK2-NEXT:    br i1 [[TMP143]], label [[OMP_OFFLOAD_FAILED12:%.*]], label [[OMP_OFFLOAD_CONT13:%.*]]
// CHECK2:       omp_offload.failed12:
// CHECK2-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111(double* [[TMP116]], %struct.TT.0* [[E]]) #[[ATTR3]]
// CHECK2-NEXT:    br label [[OMP_OFFLOAD_CONT13]]
// CHECK2:       omp_offload.cont13:
// CHECK2-NEXT:    [[TMP144:%.*]] = load i32, i32* [[A]], align 4
// CHECK2-NEXT:    [[TMP145:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
// CHECK2-NEXT:    call void @llvm.stackrestore(i8* [[TMP145]])
// CHECK2-NEXT:    ret i32 [[TMP144]]
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63
// CHECK2-SAME: (i32 noundef [[A:%.*]], i32* noundef [[P:%.*]], i32 noundef [[GA:%.*]]) #[[ATTR2:[0-9]+]] {
// CHECK2-NEXT:  entry:
// CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[P_ADDR:%.*]] = alloca i32*, align 4
// CHECK2-NEXT:    [[GA_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
// CHECK2-NEXT:    store i32* [[P]], i32** [[P_ADDR]], align 4
// CHECK2-NEXT:    store i32 [[GA]], i32* [[GA_ADDR]], align 4
// CHECK2-NEXT:    ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70
// CHECK2-SAME: (i32 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR2]] {
// CHECK2-NEXT:  entry:
// CHECK2-NEXT:    [[AA_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[B_ADDR:%.*]] = alloca [10 x float]*, align 4
// CHECK2-NEXT:    [[VLA_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[BN_ADDR:%.*]] = alloca float*, align 4
// CHECK2-NEXT:    [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 4
// CHECK2-NEXT:    [[VLA_ADDR2:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[VLA_ADDR4:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[CN_ADDR:%.*]] = alloca double*, align 4
// CHECK2-NEXT:    [[D_ADDR:%.*]] = alloca %struct.TT*, align 4
// CHECK2-NEXT:    [[B5:%.*]] = alloca [10 x float], align 4
// CHECK2-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 4
// CHECK2-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[C7:%.*]] = alloca [5 x [10 x double]], align 8
// CHECK2-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[__VLA_EXPR2:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 4
// CHECK2-NEXT:    store i32 [[AA]], i32* [[AA_ADDR]], align 4
// CHECK2-NEXT:    store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4
// CHECK2-NEXT:    store i32 [[VLA]], i32* [[VLA_ADDR]], align 4
// CHECK2-NEXT:    store float* [[BN]], float** [[BN_ADDR]], align 4
// CHECK2-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 4
// CHECK2-NEXT:    store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4
// CHECK2-NEXT:    store i32 [[VLA3]], i32* [[VLA_ADDR4]], align 4
// CHECK2-NEXT:    store double* [[CN]], double** [[CN_ADDR]], align 4
// CHECK2-NEXT:    store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 4
// CHECK2-NEXT:    [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16*
// CHECK2-NEXT:    [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 4
// CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4
// CHECK2-NEXT:    [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 4
// CHECK2-NEXT:    [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 4
// CHECK2-NEXT:    [[TMP4:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4
// CHECK2-NEXT:    [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4
// CHECK2-NEXT:    [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4
// CHECK2-NEXT:    [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4
// CHECK2-NEXT:    [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8*
// CHECK2-NEXT:    [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8*
// CHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i32 40, i1 false)
// CHECK2-NEXT:    [[TMP10:%.*]] = call i8* @llvm.stacksave()
// CHECK2-NEXT:    store i8* [[TMP10]], i8** [[SAVED_STACK]], align 4
// CHECK2-NEXT:    [[VLA6:%.*]] = alloca float, i32 [[TMP1]], align 4
// CHECK2-NEXT:    store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4
// CHECK2-NEXT:    [[TMP11:%.*]] = mul nuw i32 [[TMP1]], 4
// CHECK2-NEXT:    [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8*
// CHECK2-NEXT:    [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8*
// CHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 [[TMP11]], i1 false)
// CHECK2-NEXT:    [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8*
// CHECK2-NEXT:    [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8*
// CHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i32 400, i1 false)
// CHECK2-NEXT:    [[TMP16:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]]
// CHECK2-NEXT:    [[VLA8:%.*]] = alloca double, i32 [[TMP16]], align 8
// CHECK2-NEXT:    store i32 [[TMP4]], i32* [[__VLA_EXPR1]], align 4
// CHECK2-NEXT:    store i32 [[TMP5]], i32* [[__VLA_EXPR2]], align 4
// CHECK2-NEXT:    [[TMP17:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]]
// CHECK2-NEXT:    [[TMP18:%.*]] = mul nuw i32 [[TMP17]], 8
// CHECK2-NEXT:    [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8*
// CHECK2-NEXT:    [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8*
// CHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i32 [[TMP18]], i1 false)
// CHECK2-NEXT:    [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8*
// CHECK2-NEXT:    [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8*
// CHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP21]], i8* align 4 [[TMP22]], i32 12, i1 false)
// CHECK2-NEXT:    [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2
// CHECK2-NEXT:    [[CONV10:%.*]] = sext i16 [[TMP23]] to i32
// CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV10]], 1
// CHECK2-NEXT:    [[CONV11:%.*]] = trunc i32 [[ADD]] to i16
// CHECK2-NEXT:    store i16 [[CONV11]], i16* [[CONV]], align 2
// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i32 0, i32 2
// CHECK2-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX]], align 4
// CHECK2-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i32 3
// CHECK2-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX12]], align 4
// CHECK2-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i32 0, i32 1
// CHECK2-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i32 0, i32 2
// CHECK2-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX14]], align 8
// CHECK2-NEXT:    [[TMP24:%.*]] = mul nsw i32 1, [[TMP5]]
// CHECK2-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i32 [[TMP24]]
// CHECK2-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i32 3
// CHECK2-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX16]], align 8
// CHECK2-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0
// CHECK2-NEXT:    store i64 1, i64* [[X]], align 4
// CHECK2-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1
// CHECK2-NEXT:    store i8 1, i8* [[Y]], align 4
// CHECK2-NEXT:    [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
// CHECK2-NEXT:    call void @llvm.stackrestore(i8* [[TMP25]])
// CHECK2-NEXT:    ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111
// CHECK2-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR2]] {
// CHECK2-NEXT:  entry:
// CHECK2-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 4
// CHECK2-NEXT:    [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 4
// CHECK2-NEXT:    [[E1:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
// CHECK2-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 4
// CHECK2-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 4
// CHECK2-NEXT:    [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 4
// CHECK2-NEXT:    [[TMP1:%.*]] = bitcast %struct.TT.0* [[E1]] to i8*
// CHECK2-NEXT:    [[TMP2:%.*]] = bitcast %struct.TT.0* [[TMP0]] to i8*
// CHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 8, i1 false)
// CHECK2-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E1]], i32 0, i32 0
// CHECK2-NEXT:    [[TMP3:%.*]] = load i32, i32* [[X]], align 4
// CHECK2-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP3]] to double
// CHECK2-NEXT:    [[TMP4:%.*]] = load double*, double** [[PTR_ADDR]], align 4
// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP4]], i32 0
// CHECK2-NEXT:    store double [[CONV]], double* [[ARRAYIDX]], align 4
// CHECK2-NEXT:    [[TMP5:%.*]] = load double*, double** [[PTR_ADDR]], align 4
// CHECK2-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[TMP5]], i32 0
// CHECK2-NEXT:    [[TMP6:%.*]] = load double, double* [[ARRAYIDX2]], align 4
// CHECK2-NEXT:    [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00
// CHECK2-NEXT:    store double [[INC]], double* [[ARRAYIDX2]], align 4
// CHECK2-NEXT:    ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@_Z3bariPd
// CHECK2-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] {
// CHECK2-NEXT:  entry:
// CHECK2-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 4
// CHECK2-NEXT:    [[A:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 4
// CHECK2-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK2-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 4
// CHECK2-NEXT:    store i32 0, i32* [[A]], align 4
// CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK2-NEXT:    [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 4
// CHECK2-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z3fooiPd(i32 noundef [[TMP0]], double* noundef [[TMP1]])
// CHECK2-NEXT:    [[TMP2:%.*]] = load i32, i32* [[A]], align 4
// CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]]
// CHECK2-NEXT:    store i32 [[ADD]], i32* [[A]], align 4
// CHECK2-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK2-NEXT:    [[CALL1:%.*]] = call noundef i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 4 dereferenceable(8) [[S]], i32 noundef [[TMP3]])
// CHECK2-NEXT:    [[TMP4:%.*]] = load i32, i32* [[A]], align 4
// CHECK2-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]]
// CHECK2-NEXT:    store i32 [[ADD2]], i32* [[A]], align 4
// CHECK2-NEXT:    [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK2-NEXT:    [[CALL3:%.*]] = call noundef i32 @_ZL7fstatici(i32 noundef [[TMP5]])
// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A]], align 4
// CHECK2-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]]
// CHECK2-NEXT:    store i32 [[ADD4]], i32* [[A]], align 4
// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK2-NEXT:    [[CALL5:%.*]] = call noundef i32 @_Z9ftemplateIiET_i(i32 noundef [[TMP7]])
// CHECK2-NEXT:    [[TMP8:%.*]] = load i32, i32* [[A]], align 4
// CHECK2-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]]
// CHECK2-NEXT:    store i32 [[ADD6]], i32* [[A]], align 4
// CHECK2-NEXT:    [[TMP9:%.*]] = load i32, i32* [[A]], align 4
// CHECK2-NEXT:    ret i32 [[TMP9]]
//
//
// CHECK2-LABEL: define {{[^@]+}}@_ZN2S12r1Ei
// CHECK2-SAME: (%struct.S1* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[N:%.*]]) #[[ATTR0]] comdat align 2 {
// CHECK2-NEXT:  entry:
// CHECK2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4
// CHECK2-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[B:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 4
// CHECK2-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[B_CASTED:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [5 x i8*], align 4
// CHECK2-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x i8*], align 4
// CHECK2-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x i8*], align 4
// CHECK2-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 4
// CHECK2-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK2-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4
// CHECK2-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK2-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4
// CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// CHECK2-NEXT:    store i32 [[ADD]], i32* [[B]], align 4
// CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK2-NEXT:    [[TMP2:%.*]] = call i8* @llvm.stacksave()
// CHECK2-NEXT:    store i8* [[TMP2]], i8** [[SAVED_STACK]], align 4
// CHECK2-NEXT:    [[TMP3:%.*]] = mul nuw i32 2, [[TMP1]]
// CHECK2-NEXT:    [[VLA:%.*]] = alloca i16, i32 [[TMP3]], align 2
// CHECK2-NEXT:    store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4
// CHECK2-NEXT:    [[TMP4:%.*]] = load i32, i32* [[B]], align 4
// CHECK2-NEXT:    store i32 [[TMP4]], i32* [[B_CASTED]], align 4
// CHECK2-NEXT:    [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4
// CHECK2-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
// CHECK2-NEXT:    [[TMP6:%.*]] = mul nuw i32 2, [[TMP1]]
// CHECK2-NEXT:    [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2
// CHECK2-NEXT:    [[TMP8:%.*]] = sext i32 [[TMP7]] to i64
// CHECK2-NEXT:    [[TMP9:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
// CHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP9]], i8* align 4 bitcast ([5 x i64]* @.offload_sizes.5 to i8*), i32 40, i1 false)
// CHECK2-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK2-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
// CHECK2-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 4
// CHECK2-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK2-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
// CHECK2-NEXT:    store double* [[A]], double** [[TMP13]], align 4
// CHECK2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
// CHECK2-NEXT:    store i8* null, i8** [[TMP14]], align 4
// CHECK2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK2-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i32*
// CHECK2-NEXT:    store i32 [[TMP5]], i32* [[TMP16]], align 4
// CHECK2-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK2-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32*
// CHECK2-NEXT:    store i32 [[TMP5]], i32* [[TMP18]], align 4
// CHECK2-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
// CHECK2-NEXT:    store i8* null, i8** [[TMP19]], align 4
// CHECK2-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK2-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32*
// CHECK2-NEXT:    store i32 2, i32* [[TMP21]], align 4
// CHECK2-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK2-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32*
// CHECK2-NEXT:    store i32 2, i32* [[TMP23]], align 4
// CHECK2-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
// CHECK2-NEXT:    store i8* null, i8** [[TMP24]], align 4
// CHECK2-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
// CHECK2-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i32*
// CHECK2-NEXT:    store i32 [[TMP1]], i32* [[TMP26]], align 4
// CHECK2-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
// CHECK2-NEXT:    [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i32*
// CHECK2-NEXT:    store i32 [[TMP1]], i32* [[TMP28]], align 4
// CHECK2-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
// CHECK2-NEXT:    store i8* null, i8** [[TMP29]], align 4
// CHECK2-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
// CHECK2-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i16**
// CHECK2-NEXT:    store i16* [[VLA]], i16** [[TMP31]], align 4
// CHECK2-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
// CHECK2-NEXT:    [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i16**
// CHECK2-NEXT:    store i16* [[VLA]], i16** [[TMP33]], align 4
// CHECK2-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
// CHECK2-NEXT:    store i64 [[TMP8]], i64* [[TMP34]], align 4
// CHECK2-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
// CHECK2-NEXT:    store i8* null, i8** [[TMP35]], align 4
// CHECK2-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK2-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK2-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
// CHECK2-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK2-NEXT:    store i32 2, i32* [[TMP39]], align 4
// CHECK2-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK2-NEXT:    store i32 5, i32* [[TMP40]], align 4
// CHECK2-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK2-NEXT:    store i8** [[TMP36]], i8*** [[TMP41]], align 4
// CHECK2-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK2-NEXT:    store i8** [[TMP37]], i8*** [[TMP42]], align 4
// CHECK2-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK2-NEXT:    store i64* [[TMP38]], i64** [[TMP43]], align 4
// CHECK2-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK2-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.6, i32 0, i32 0), i64** [[TMP44]], align 4
// CHECK2-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK2-NEXT:    store i8** null, i8*** [[TMP45]], align 4
// CHECK2-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK2-NEXT:    store i8** null, i8*** [[TMP46]], align 4
// CHECK2-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK2-NEXT:    store i64 0, i64* [[TMP47]], align 8
// CHECK2-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK2-NEXT:    store i64 0, i64* [[TMP48]], align 8
// CHECK2-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK2-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP49]], align 4
// CHECK2-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK2-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP50]], align 4
// CHECK2-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK2-NEXT:    store i32 0, i32* [[TMP51]], align 4
// CHECK2-NEXT:    [[TMP52:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK2-NEXT:    [[TMP53:%.*]] = icmp ne i32 [[TMP52]], 0
// CHECK2-NEXT:    br i1 [[TMP53]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK2:       omp_offload.failed:
// CHECK2-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR3]]
// CHECK2-NEXT:    br label [[OMP_OFFLOAD_CONT]]
// CHECK2:       omp_offload.cont:
// CHECK2-NEXT:    [[TMP54:%.*]] = mul nsw i32 1, [[TMP1]]
// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP54]]
// CHECK2-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1
// CHECK2-NEXT:    [[TMP55:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
// CHECK2-NEXT:    [[CONV:%.*]] = sext i16 [[TMP55]] to i32
// CHECK2-NEXT:    [[TMP56:%.*]] = load i32, i32* [[B]], align 4
// CHECK2-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP56]]
// CHECK2-NEXT:    [[TMP57:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
// CHECK2-NEXT:    call void @llvm.stackrestore(i8* [[TMP57]])
// CHECK2-NEXT:    ret i32 [[ADD3]]
//
//
// CHECK2-LABEL: define {{[^@]+}}@_ZL7fstatici
// CHECK2-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] {
// CHECK2-NEXT:  entry:
// CHECK2-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[A:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[AAA:%.*]] = alloca i8, align 1
// CHECK2-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
// CHECK2-NEXT:    [[A_CASTED:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[AAA_CASTED:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 4
// CHECK2-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 4
// CHECK2-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 4
// CHECK2-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK2-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK2-NEXT:    store i32 0, i32* [[A]], align 4
// CHECK2-NEXT:    store i8 0, i8* [[AAA]], align 1
// CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
// CHECK2-NEXT:    store i32 [[TMP0]], i32* [[A_CASTED]], align 4
// CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4
// CHECK2-NEXT:    [[TMP2:%.*]] = load i8, i8* [[AAA]], align 1
// CHECK2-NEXT:    [[CONV:%.*]] = bitcast i32* [[AAA_CASTED]] to i8*
// CHECK2-NEXT:    store i8 [[TMP2]], i8* [[CONV]], align 1
// CHECK2-NEXT:    [[TMP3:%.*]] = load i32, i32* [[AAA_CASTED]], align 4
// CHECK2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK2-NEXT:    [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i32*
// CHECK2-NEXT:    store i32 [[TMP1]], i32* [[TMP5]], align 4
// CHECK2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK2-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32*
// CHECK2-NEXT:    store i32 [[TMP1]], i32* [[TMP7]], align 4
// CHECK2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
// CHECK2-NEXT:    store i8* null, i8** [[TMP8]], align 4
// CHECK2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK2-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
// CHECK2-NEXT:    store i32 [[TMP3]], i32* [[TMP10]], align 4
// CHECK2-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK2-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32*
// CHECK2-NEXT:    store i32 [[TMP3]], i32* [[TMP12]], align 4
// CHECK2-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
// CHECK2-NEXT:    store i8* null, i8** [[TMP13]], align 4
// CHECK2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK2-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to [10 x i32]**
// CHECK2-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP15]], align 4
// CHECK2-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK2-NEXT:    [[TMP17:%.*]] = bitcast i8** [[TMP16]] to [10 x i32]**
// CHECK2-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP17]], align 4
// CHECK2-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
// CHECK2-NEXT:    store i8* null, i8** [[TMP18]], align 4
// CHECK2-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK2-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK2-NEXT:    store i32 2, i32* [[TMP21]], align 4
// CHECK2-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK2-NEXT:    store i32 3, i32* [[TMP22]], align 4
// CHECK2-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK2-NEXT:    store i8** [[TMP19]], i8*** [[TMP23]], align 4
// CHECK2-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK2-NEXT:    store i8** [[TMP20]], i8*** [[TMP24]], align 4
// CHECK2-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK2-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.7, i32 0, i32 0), i64** [[TMP25]], align 4
// CHECK2-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK2-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.8, i32 0, i32 0), i64** [[TMP26]], align 4
// CHECK2-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK2-NEXT:    store i8** null, i8*** [[TMP27]], align 4
// CHECK2-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK2-NEXT:    store i8** null, i8*** [[TMP28]], align 4
// CHECK2-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK2-NEXT:    store i64 0, i64* [[TMP29]], align 8
// CHECK2-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK2-NEXT:    store i64 0, i64* [[TMP30]], align 8
// CHECK2-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK2-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP31]], align 4
// CHECK2-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK2-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP32]], align 4
// CHECK2-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK2-NEXT:    store i32 0, i32* [[TMP33]], align 4
// CHECK2-NEXT:    [[TMP34:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK2-NEXT:    [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0
// CHECK2-NEXT:    br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK2:       omp_offload.failed:
// CHECK2-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142(i32 [[TMP1]], i32 [[TMP3]], [10 x i32]* [[B]]) #[[ATTR3]]
// CHECK2-NEXT:    br label [[OMP_OFFLOAD_CONT]]
// CHECK2:       omp_offload.cont:
// CHECK2-NEXT:    [[TMP36:%.*]] = load i32, i32* [[A]], align 4
// CHECK2-NEXT:    ret i32 [[TMP36]]
//
//
// CHECK2-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i
// CHECK2-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] comdat {
// CHECK2-NEXT:  entry:
// CHECK2-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[A:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
// CHECK2-NEXT:    [[A_CASTED:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x i8*], align 4
// CHECK2-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x i8*], align 4
// CHECK2-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x i8*], align 4
// CHECK2-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK2-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK2-NEXT:    store i32 0, i32* [[A]], align 4
// CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
// CHECK2-NEXT:    store i32 [[TMP0]], i32* [[A_CASTED]], align 4
// CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4
// CHECK2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK2-NEXT:    [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32*
// CHECK2-NEXT:    store i32 [[TMP1]], i32* [[TMP3]], align 4
// CHECK2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK2-NEXT:    [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i32*
// CHECK2-NEXT:    store i32 [[TMP1]], i32* [[TMP5]], align 4
// CHECK2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
// CHECK2-NEXT:    store i8* null, i8** [[TMP6]], align 4
// CHECK2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK2-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to [10 x i32]**
// CHECK2-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP8]], align 4
// CHECK2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK2-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to [10 x i32]**
// CHECK2-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP10]], align 4
// CHECK2-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
// CHECK2-NEXT:    store i8* null, i8** [[TMP11]], align 4
// CHECK2-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK2-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK2-NEXT:    store i32 2, i32* [[TMP14]], align 4
// CHECK2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK2-NEXT:    store i32 2, i32* [[TMP15]], align 4
// CHECK2-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK2-NEXT:    store i8** [[TMP12]], i8*** [[TMP16]], align 4
// CHECK2-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK2-NEXT:    store i8** [[TMP13]], i8*** [[TMP17]], align 4
// CHECK2-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK2-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.9, i32 0, i32 0), i64** [[TMP18]], align 4
// CHECK2-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK2-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.10, i32 0, i32 0), i64** [[TMP19]], align 4
// CHECK2-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK2-NEXT:    store i8** null, i8*** [[TMP20]], align 4
// CHECK2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK2-NEXT:    store i8** null, i8*** [[TMP21]], align 4
// CHECK2-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK2-NEXT:    store i64 0, i64* [[TMP22]], align 8
// CHECK2-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK2-NEXT:    store i64 0, i64* [[TMP23]], align 8
// CHECK2-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK2-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP24]], align 4
// CHECK2-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK2-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP25]], align 4
// CHECK2-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK2-NEXT:    store i32 0, i32* [[TMP26]], align 4
// CHECK2-NEXT:    [[TMP27:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK2-NEXT:    [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0
// CHECK2-NEXT:    br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK2:       omp_offload.failed:
// CHECK2-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128(i32 [[TMP1]], [10 x i32]* [[B]]) #[[ATTR3]]
// CHECK2-NEXT:    br label [[OMP_OFFLOAD_CONT]]
// CHECK2:       omp_offload.cont:
// CHECK2-NEXT:    [[TMP29:%.*]] = load i32, i32* [[A]], align 4
// CHECK2-NEXT:    ret i32 [[TMP29]]
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167
// CHECK2-SAME: (%struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] {
// CHECK2-NEXT:  entry:
// CHECK2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4
// CHECK2-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[VLA_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[VLA_ADDR2:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[C_ADDR:%.*]] = alloca i16*, align 4
// CHECK2-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 4
// CHECK2-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4
// CHECK2-NEXT:    store i32 [[B]], i32* [[B_ADDR]], align 4
// CHECK2-NEXT:    store i32 [[VLA]], i32* [[VLA_ADDR]], align 4
// CHECK2-NEXT:    store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4
// CHECK2-NEXT:    store i16* [[C]], i16** [[C_ADDR]], align 4
// CHECK2-NEXT:    [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4
// CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4
// CHECK2-NEXT:    [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4
// CHECK2-NEXT:    [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4
// CHECK2-NEXT:    [[TMP4:%.*]] = call i8* @llvm.stacksave()
// CHECK2-NEXT:    store i8* [[TMP4]], i8** [[SAVED_STACK]], align 4
// CHECK2-NEXT:    [[TMP5:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
// CHECK2-NEXT:    [[VLA3:%.*]] = alloca i16, i32 [[TMP5]], align 2
// CHECK2-NEXT:    store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4
// CHECK2-NEXT:    store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4
// CHECK2-NEXT:    [[TMP6:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
// CHECK2-NEXT:    [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2
// CHECK2-NEXT:    [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8*
// CHECK2-NEXT:    [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8*
// CHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i32 [[TMP7]], i1 false)
// CHECK2-NEXT:    [[TMP10:%.*]] = load i32, i32* [[B_ADDR]], align 4
// CHECK2-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP10]] to double
// CHECK2-NEXT:    [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00
// CHECK2-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0
// CHECK2-NEXT:    store double [[ADD]], double* [[A]], align 4
// CHECK2-NEXT:    [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0
// CHECK2-NEXT:    [[TMP11:%.*]] = load double, double* [[A4]], align 4
// CHECK2-NEXT:    [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00
// CHECK2-NEXT:    store double [[INC]], double* [[A4]], align 4
// CHECK2-NEXT:    [[CONV5:%.*]] = fptosi double [[INC]] to i16
// CHECK2-NEXT:    [[TMP12:%.*]] = mul nsw i32 1, [[TMP2]]
// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i32 [[TMP12]]
// CHECK2-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1
// CHECK2-NEXT:    store i16 [[CONV5]], i16* [[ARRAYIDX6]], align 2
// CHECK2-NEXT:    [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
// CHECK2-NEXT:    call void @llvm.stackrestore(i8* [[TMP13]])
// CHECK2-NEXT:    ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142
// CHECK2-SAME: (i32 noundef [[A:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
// CHECK2-NEXT:  entry:
// CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[AAA_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4
// CHECK2-NEXT:    [[B1:%.*]] = alloca [10 x i32], align 4
// CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
// CHECK2-NEXT:    store i32 [[AAA]], i32* [[AAA_ADDR]], align 4
// CHECK2-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4
// CHECK2-NEXT:    [[CONV:%.*]] = bitcast i32* [[AAA_ADDR]] to i8*
// CHECK2-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4
// CHECK2-NEXT:    [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8*
// CHECK2-NEXT:    [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
// CHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false)
// CHECK2-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
// CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// CHECK2-NEXT:    store i32 [[ADD]], i32* [[A_ADDR]], align 4
// CHECK2-NEXT:    [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1
// CHECK2-NEXT:    [[CONV2:%.*]] = sext i8 [[TMP4]] to i32
// CHECK2-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1
// CHECK2-NEXT:    [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8
// CHECK2-NEXT:    store i8 [[CONV4]], i8* [[CONV]], align 1
// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2
// CHECK2-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// CHECK2-NEXT:    [[ADD5:%.*]] = add nsw i32 [[TMP5]], 1
// CHECK2-NEXT:    store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4
// CHECK2-NEXT:    ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128
// CHECK2-SAME: (i32 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
// CHECK2-NEXT:  entry:
// CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT:    [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4
// CHECK2-NEXT:    [[B1:%.*]] = alloca [10 x i32], align 4
// CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
// CHECK2-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4
// CHECK2-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4
// CHECK2-NEXT:    [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8*
// CHECK2-NEXT:    [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
// CHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false)
// CHECK2-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
// CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// CHECK2-NEXT:    store i32 [[ADD]], i32* [[A_ADDR]], align 4
// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2
// CHECK2-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// CHECK2-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1
// CHECK2-NEXT:    store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4
// CHECK2-NEXT:    ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
// CHECK2-SAME: () #[[ATTR5:[0-9]+]] {
// CHECK2-NEXT:  entry:
// CHECK2-NEXT:    call void @__tgt_register_requires(i64 1)
// CHECK2-NEXT:    ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@_Z3fooiPd
// CHECK3-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK3-NEXT:  entry:
// CHECK3-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 4
// CHECK3-NEXT:    [[A:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[AA:%.*]] = alloca i16, align 2
// CHECK3-NEXT:    [[B:%.*]] = alloca [10 x float], align 4
// CHECK3-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 4
// CHECK3-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[C:%.*]] = alloca [5 x [10 x double]], align 8
// CHECK3-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 4
// CHECK3-NEXT:    [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
// CHECK3-NEXT:    [[P:%.*]] = alloca i32*, align 64
// CHECK3-NEXT:    [[A_CASTED:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[GA_CASTED:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 4
// CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 4
// CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 4
// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK3-NEXT:    [[AA_CASTED:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [9 x i8*], align 4
// CHECK3-NEXT:    [[DOTOFFLOAD_PTRS3:%.*]] = alloca [9 x i8*], align 4
// CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS4:%.*]] = alloca [9 x i8*], align 4
// CHECK3-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 4
// CHECK3-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
// CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [2 x i8*], align 4
// CHECK3-NEXT:    [[DOTOFFLOAD_PTRS9:%.*]] = alloca [2 x i8*], align 4
// CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [2 x i8*], align 4
// CHECK3-NEXT:    [[KERNEL_ARGS11:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
// CHECK3-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK3-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 4
// CHECK3-NEXT:    store i32 0, i32* [[A]], align 4
// CHECK3-NEXT:    store i16 0, i16* [[AA]], align 2
// CHECK3-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK3-NEXT:    [[TMP1:%.*]] = call i8* @llvm.stacksave()
// CHECK3-NEXT:    store i8* [[TMP1]], i8** [[SAVED_STACK]], align 4
// CHECK3-NEXT:    [[VLA:%.*]] = alloca float, i32 [[TMP0]], align 4
// CHECK3-NEXT:    store i32 [[TMP0]], i32* [[__VLA_EXPR0]], align 4
// CHECK3-NEXT:    [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK3-NEXT:    [[TMP3:%.*]] = mul nuw i32 5, [[TMP2]]
// CHECK3-NEXT:    [[VLA1:%.*]] = alloca double, i32 [[TMP3]], align 8
// CHECK3-NEXT:    store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4
// CHECK3-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0
// CHECK3-NEXT:    [[TMP4:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK3-NEXT:    store i32 [[TMP4]], i32* [[X]], align 4
// CHECK3-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1
// CHECK3-NEXT:    [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK3-NEXT:    store i32 [[TMP5]], i32* [[Y]], align 4
// CHECK3-NEXT:    store i32* [[A]], i32** [[P]], align 64
// CHECK3-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A]], align 4
// CHECK3-NEXT:    store i32 [[TMP6]], i32* [[A_CASTED]], align 4
// CHECK3-NEXT:    [[TMP7:%.*]] = load i32, i32* [[A_CASTED]], align 4
// CHECK3-NEXT:    [[TMP8:%.*]] = load i32*, i32** [[P]], align 64
// CHECK3-NEXT:    [[TMP9:%.*]] = load i32, i32* @ga, align 4
// CHECK3-NEXT:    store i32 [[TMP9]], i32* [[GA_CASTED]], align 4
// CHECK3-NEXT:    [[TMP10:%.*]] = load i32, i32* [[GA_CASTED]], align 4
// CHECK3-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK3-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32*
// CHECK3-NEXT:    store i32 [[TMP7]], i32* [[TMP12]], align 4
// CHECK3-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i32*
// CHECK3-NEXT:    store i32 [[TMP7]], i32* [[TMP14]], align 4
// CHECK3-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
// CHECK3-NEXT:    store i8* null, i8** [[TMP15]], align 4
// CHECK3-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK3-NEXT:    [[TMP17:%.*]] = bitcast i8** [[TMP16]] to i32**
// CHECK3-NEXT:    store i32* [[TMP8]], i32** [[TMP17]], align 4
// CHECK3-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK3-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
// CHECK3-NEXT:    store i32* [[TMP8]], i32** [[TMP19]], align 4
// CHECK3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
// CHECK3-NEXT:    store i8* null, i8** [[TMP20]], align 4
// CHECK3-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK3-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i32*
// CHECK3-NEXT:    store i32 [[TMP10]], i32* [[TMP22]], align 4
// CHECK3-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK3-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32*
// CHECK3-NEXT:    store i32 [[TMP10]], i32* [[TMP24]], align 4
// CHECK3-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
// CHECK3-NEXT:    store i8* null, i8** [[TMP25]], align 4
// CHECK3-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK3-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK3-NEXT:    store i32 2, i32* [[TMP28]], align 4
// CHECK3-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK3-NEXT:    store i32 3, i32* [[TMP29]], align 4
// CHECK3-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK3-NEXT:    store i8** [[TMP26]], i8*** [[TMP30]], align 4
// CHECK3-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK3-NEXT:    store i8** [[TMP27]], i8*** [[TMP31]], align 4
// CHECK3-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK3-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP32]], align 4
// CHECK3-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK3-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP33]], align 4
// CHECK3-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK3-NEXT:    store i8** null, i8*** [[TMP34]], align 4
// CHECK3-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK3-NEXT:    store i8** null, i8*** [[TMP35]], align 4
// CHECK3-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK3-NEXT:    store i64 0, i64* [[TMP36]], align 8
// CHECK3-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK3-NEXT:    store i64 0, i64* [[TMP37]], align 8
// CHECK3-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK3-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP38]], align 4
// CHECK3-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK3-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP39]], align 4
// CHECK3-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK3-NEXT:    store i32 0, i32* [[TMP40]], align 4
// CHECK3-NEXT:    [[TMP41:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK3-NEXT:    [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0
// CHECK3-NEXT:    br i1 [[TMP42]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK3:       omp_offload.failed:
// CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63(i32 [[TMP7]], i32* [[TMP8]], i32 [[TMP10]]) #[[ATTR3:[0-9]+]]
// CHECK3-NEXT:    br label [[OMP_OFFLOAD_CONT]]
// CHECK3:       omp_offload.cont:
// CHECK3-NEXT:    [[TMP43:%.*]] = load i16, i16* [[AA]], align 2
// CHECK3-NEXT:    [[CONV:%.*]] = bitcast i32* [[AA_CASTED]] to i16*
// CHECK3-NEXT:    store i16 [[TMP43]], i16* [[CONV]], align 2
// CHECK3-NEXT:    [[TMP44:%.*]] = load i32, i32* [[AA_CASTED]], align 4
// CHECK3-NEXT:    [[TMP45:%.*]] = mul nuw i32 [[TMP0]], 4
// CHECK3-NEXT:    [[TMP46:%.*]] = sext i32 [[TMP45]] to i64
// CHECK3-NEXT:    [[TMP47:%.*]] = mul nuw i32 5, [[TMP2]]
// CHECK3-NEXT:    [[TMP48:%.*]] = mul nuw i32 [[TMP47]], 8
// CHECK3-NEXT:    [[TMP49:%.*]] = sext i32 [[TMP48]] to i64
// CHECK3-NEXT:    [[TMP50:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
// CHECK3-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP50]], i8* align 4 bitcast ([9 x i64]* @.offload_sizes.1 to i8*), i32 72, i1 false)
// CHECK3-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0
// CHECK3-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32*
// CHECK3-NEXT:    store i32 [[TMP44]], i32* [[TMP52]], align 4
// CHECK3-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 0
// CHECK3-NEXT:    [[TMP54:%.*]] = bitcast i8** [[TMP53]] to i32*
// CHECK3-NEXT:    store i32 [[TMP44]], i32* [[TMP54]], align 4
// CHECK3-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 0
// CHECK3-NEXT:    store i8* null, i8** [[TMP55]], align 4
// CHECK3-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 1
// CHECK3-NEXT:    [[TMP57:%.*]] = bitcast i8** [[TMP56]] to [10 x float]**
// CHECK3-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP57]], align 4
// CHECK3-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 1
// CHECK3-NEXT:    [[TMP59:%.*]] = bitcast i8** [[TMP58]] to [10 x float]**
// CHECK3-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP59]], align 4
// CHECK3-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 1
// CHECK3-NEXT:    store i8* null, i8** [[TMP60]], align 4
// CHECK3-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 2
// CHECK3-NEXT:    [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i32*
// CHECK3-NEXT:    store i32 [[TMP0]], i32* [[TMP62]], align 4
// CHECK3-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 2
// CHECK3-NEXT:    [[TMP64:%.*]] = bitcast i8** [[TMP63]] to i32*
// CHECK3-NEXT:    store i32 [[TMP0]], i32* [[TMP64]], align 4
// CHECK3-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 2
// CHECK3-NEXT:    store i8* null, i8** [[TMP65]], align 4
// CHECK3-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 3
// CHECK3-NEXT:    [[TMP67:%.*]] = bitcast i8** [[TMP66]] to float**
// CHECK3-NEXT:    store float* [[VLA]], float** [[TMP67]], align 4
// CHECK3-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 3
// CHECK3-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to float**
// CHECK3-NEXT:    store float* [[VLA]], float** [[TMP69]], align 4
// CHECK3-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
// CHECK3-NEXT:    store i64 [[TMP46]], i64* [[TMP70]], align 4
// CHECK3-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 3
// CHECK3-NEXT:    store i8* null, i8** [[TMP71]], align 4
// CHECK3-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 4
// CHECK3-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to [5 x [10 x double]]**
// CHECK3-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP73]], align 4
// CHECK3-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 4
// CHECK3-NEXT:    [[TMP75:%.*]] = bitcast i8** [[TMP74]] to [5 x [10 x double]]**
// CHECK3-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP75]], align 4
// CHECK3-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 4
// CHECK3-NEXT:    store i8* null, i8** [[TMP76]], align 4
// CHECK3-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 5
// CHECK3-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i32*
// CHECK3-NEXT:    store i32 5, i32* [[TMP78]], align 4
// CHECK3-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 5
// CHECK3-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32*
// CHECK3-NEXT:    store i32 5, i32* [[TMP80]], align 4
// CHECK3-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 5
// CHECK3-NEXT:    store i8* null, i8** [[TMP81]], align 4
// CHECK3-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 6
// CHECK3-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32*
// CHECK3-NEXT:    store i32 [[TMP2]], i32* [[TMP83]], align 4
// CHECK3-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 6
// CHECK3-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i32*
// CHECK3-NEXT:    store i32 [[TMP2]], i32* [[TMP85]], align 4
// CHECK3-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 6
// CHECK3-NEXT:    store i8* null, i8** [[TMP86]], align 4
// CHECK3-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 7
// CHECK3-NEXT:    [[TMP88:%.*]] = bitcast i8** [[TMP87]] to double**
// CHECK3-NEXT:    store double* [[VLA1]], double** [[TMP88]], align 4
// CHECK3-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 7
// CHECK3-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to double**
// CHECK3-NEXT:    store double* [[VLA1]], double** [[TMP90]], align 4
// CHECK3-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
// CHECK3-NEXT:    store i64 [[TMP49]], i64* [[TMP91]], align 4
// CHECK3-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 7
// CHECK3-NEXT:    store i8* null, i8** [[TMP92]], align 4
// CHECK3-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 8
// CHECK3-NEXT:    [[TMP94:%.*]] = bitcast i8** [[TMP93]] to %struct.TT**
// CHECK3-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP94]], align 4
// CHECK3-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 8
// CHECK3-NEXT:    [[TMP96:%.*]] = bitcast i8** [[TMP95]] to %struct.TT**
// CHECK3-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP96]], align 4
// CHECK3-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 8
// CHECK3-NEXT:    store i8* null, i8** [[TMP97]], align 4
// CHECK3-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0
// CHECK3-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 0
// CHECK3-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
// CHECK3-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 0
// CHECK3-NEXT:    store i32 2, i32* [[TMP101]], align 4
// CHECK3-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 1
// CHECK3-NEXT:    store i32 9, i32* [[TMP102]], align 4
// CHECK3-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 2
// CHECK3-NEXT:    store i8** [[TMP98]], i8*** [[TMP103]], align 4
// CHECK3-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 3
// CHECK3-NEXT:    store i8** [[TMP99]], i8*** [[TMP104]], align 4
// CHECK3-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 4
// CHECK3-NEXT:    store i64* [[TMP100]], i64** [[TMP105]], align 4
// CHECK3-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 5
// CHECK3-NEXT:    store i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.2, i32 0, i32 0), i64** [[TMP106]], align 4
// CHECK3-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 6
// CHECK3-NEXT:    store i8** null, i8*** [[TMP107]], align 4
// CHECK3-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 7
// CHECK3-NEXT:    store i8** null, i8*** [[TMP108]], align 4
// CHECK3-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 8
// CHECK3-NEXT:    store i64 0, i64* [[TMP109]], align 8
// CHECK3-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 9
// CHECK3-NEXT:    store i64 0, i64* [[TMP110]], align 8
// CHECK3-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 10
// CHECK3-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP111]], align 4
// CHECK3-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 11
// CHECK3-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP112]], align 4
// CHECK3-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 12
// CHECK3-NEXT:    store i32 0, i32* [[TMP113]], align 4
// CHECK3-NEXT:    [[TMP114:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]])
// CHECK3-NEXT:    [[TMP115:%.*]] = icmp ne i32 [[TMP114]], 0
// CHECK3-NEXT:    br i1 [[TMP115]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]]
// CHECK3:       omp_offload.failed6:
// CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70(i32 [[TMP44]], [10 x float]* [[B]], i32 [[TMP0]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP2]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR3]]
// CHECK3-NEXT:    br label [[OMP_OFFLOAD_CONT7]]
// CHECK3:       omp_offload.cont7:
// CHECK3-NEXT:    [[TMP116:%.*]] = load double*, double** [[PTR_ADDR]], align 4
// CHECK3-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
// CHECK3-NEXT:    [[TMP118:%.*]] = bitcast i8** [[TMP117]] to double**
// CHECK3-NEXT:    store double* [[TMP116]], double** [[TMP118]], align 4
// CHECK3-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
// CHECK3-NEXT:    [[TMP120:%.*]] = bitcast i8** [[TMP119]] to double**
// CHECK3-NEXT:    store double* [[TMP116]], double** [[TMP120]], align 4
// CHECK3-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS10]], i32 0, i32 0
// CHECK3-NEXT:    store i8* null, i8** [[TMP121]], align 4
// CHECK3-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 1
// CHECK3-NEXT:    [[TMP123:%.*]] = bitcast i8** [[TMP122]] to %struct.TT.0**
// CHECK3-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[TMP123]], align 4
// CHECK3-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 1
// CHECK3-NEXT:    [[TMP125:%.*]] = bitcast i8** [[TMP124]] to %struct.TT.0**
// CHECK3-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[TMP125]], align 4
// CHECK3-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS10]], i32 0, i32 1
// CHECK3-NEXT:    store i8* null, i8** [[TMP126]], align 4
// CHECK3-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
// CHECK3-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
// CHECK3-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 0
// CHECK3-NEXT:    store i32 2, i32* [[TMP129]], align 4
// CHECK3-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 1
// CHECK3-NEXT:    store i32 2, i32* [[TMP130]], align 4
// CHECK3-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 2
// CHECK3-NEXT:    store i8** [[TMP127]], i8*** [[TMP131]], align 4
// CHECK3-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 3
// CHECK3-NEXT:    store i8** [[TMP128]], i8*** [[TMP132]], align 4
// CHECK3-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 4
// CHECK3-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.3, i32 0, i32 0), i64** [[TMP133]], align 4
// CHECK3-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 5
// CHECK3-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.4, i32 0, i32 0), i64** [[TMP134]], align 4
// CHECK3-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 6
// CHECK3-NEXT:    store i8** null, i8*** [[TMP135]], align 4
// CHECK3-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 7
// CHECK3-NEXT:    store i8** null, i8*** [[TMP136]], align 4
// CHECK3-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 8
// CHECK3-NEXT:    store i64 0, i64* [[TMP137]], align 8
// CHECK3-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 9
// CHECK3-NEXT:    store i64 0, i64* [[TMP138]], align 8
// CHECK3-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 10
// CHECK3-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP139]], align 4
// CHECK3-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 11
// CHECK3-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP140]], align 4
// CHECK3-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 12
// CHECK3-NEXT:    store i32 0, i32* [[TMP141]], align 4
// CHECK3-NEXT:    [[TMP142:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]])
// CHECK3-NEXT:    [[TMP143:%.*]] = icmp ne i32 [[TMP142]], 0
// CHECK3-NEXT:    br i1 [[TMP143]], label [[OMP_OFFLOAD_FAILED12:%.*]], label [[OMP_OFFLOAD_CONT13:%.*]]
// CHECK3:       omp_offload.failed12:
// CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111(double* [[TMP116]], %struct.TT.0* [[E]]) #[[ATTR3]]
// CHECK3-NEXT:    br label [[OMP_OFFLOAD_CONT13]]
// CHECK3:       omp_offload.cont13:
// CHECK3-NEXT:    [[TMP144:%.*]] = load i32, i32* [[A]], align 4
// CHECK3-NEXT:    [[TMP145:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
// CHECK3-NEXT:    call void @llvm.stackrestore(i8* [[TMP145]])
// CHECK3-NEXT:    ret i32 [[TMP144]]
//
//
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63
// CHECK3-SAME: (i32 noundef [[A:%.*]], i32* noundef [[P:%.*]], i32 noundef [[GA:%.*]]) #[[ATTR2:[0-9]+]] {
// CHECK3-NEXT:  entry:
// CHECK3-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[P_ADDR:%.*]] = alloca i32*, align 4
// CHECK3-NEXT:    [[GA_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
// CHECK3-NEXT:    store i32* [[P]], i32** [[P_ADDR]], align 4
// CHECK3-NEXT:    store i32 [[GA]], i32* [[GA_ADDR]], align 4
// CHECK3-NEXT:    ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70
// CHECK3-SAME: (i32 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR2]] {
// CHECK3-NEXT:  entry:
// CHECK3-NEXT:    [[AA_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[B_ADDR:%.*]] = alloca [10 x float]*, align 4
// CHECK3-NEXT:    [[VLA_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[BN_ADDR:%.*]] = alloca float*, align 4
// CHECK3-NEXT:    [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 4
// CHECK3-NEXT:    [[VLA_ADDR2:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[VLA_ADDR4:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[CN_ADDR:%.*]] = alloca double*, align 4
// CHECK3-NEXT:    [[D_ADDR:%.*]] = alloca %struct.TT*, align 4
// CHECK3-NEXT:    [[B5:%.*]] = alloca [10 x float], align 4
// CHECK3-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 4
// CHECK3-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[C7:%.*]] = alloca [5 x [10 x double]], align 8
// CHECK3-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[__VLA_EXPR2:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 4
// CHECK3-NEXT:    store i32 [[AA]], i32* [[AA_ADDR]], align 4
// CHECK3-NEXT:    store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4
// CHECK3-NEXT:    store i32 [[VLA]], i32* [[VLA_ADDR]], align 4
// CHECK3-NEXT:    store float* [[BN]], float** [[BN_ADDR]], align 4
// CHECK3-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 4
// CHECK3-NEXT:    store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4
// CHECK3-NEXT:    store i32 [[VLA3]], i32* [[VLA_ADDR4]], align 4
// CHECK3-NEXT:    store double* [[CN]], double** [[CN_ADDR]], align 4
// CHECK3-NEXT:    store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 4
// CHECK3-NEXT:    [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16*
// CHECK3-NEXT:    [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 4
// CHECK3-NEXT:    [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4
// CHECK3-NEXT:    [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 4
// CHECK3-NEXT:    [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 4
// CHECK3-NEXT:    [[TMP4:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4
// CHECK3-NEXT:    [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4
// CHECK3-NEXT:    [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4
// CHECK3-NEXT:    [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4
// CHECK3-NEXT:    [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8*
// CHECK3-NEXT:    [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8*
// CHECK3-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i32 40, i1 false)
// CHECK3-NEXT:    [[TMP10:%.*]] = call i8* @llvm.stacksave()
// CHECK3-NEXT:    store i8* [[TMP10]], i8** [[SAVED_STACK]], align 4
// CHECK3-NEXT:    [[VLA6:%.*]] = alloca float, i32 [[TMP1]], align 4
// CHECK3-NEXT:    store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4
// CHECK3-NEXT:    [[TMP11:%.*]] = mul nuw i32 [[TMP1]], 4
// CHECK3-NEXT:    [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8*
// CHECK3-NEXT:    [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8*
// CHECK3-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 [[TMP11]], i1 false)
// CHECK3-NEXT:    [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8*
// CHECK3-NEXT:    [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8*
// CHECK3-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i32 400, i1 false)
// CHECK3-NEXT:    [[TMP16:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]]
// CHECK3-NEXT:    [[VLA8:%.*]] = alloca double, i32 [[TMP16]], align 8
// CHECK3-NEXT:    store i32 [[TMP4]], i32* [[__VLA_EXPR1]], align 4
// CHECK3-NEXT:    store i32 [[TMP5]], i32* [[__VLA_EXPR2]], align 4
// CHECK3-NEXT:    [[TMP17:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]]
// CHECK3-NEXT:    [[TMP18:%.*]] = mul nuw i32 [[TMP17]], 8
// CHECK3-NEXT:    [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8*
// CHECK3-NEXT:    [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8*
// CHECK3-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i32 [[TMP18]], i1 false)
// CHECK3-NEXT:    [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8*
// CHECK3-NEXT:    [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8*
// CHECK3-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP21]], i8* align 4 [[TMP22]], i32 12, i1 false)
// CHECK3-NEXT:    [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2
// CHECK3-NEXT:    [[CONV10:%.*]] = sext i16 [[TMP23]] to i32
// CHECK3-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV10]], 1
// CHECK3-NEXT:    [[CONV11:%.*]] = trunc i32 [[ADD]] to i16
// CHECK3-NEXT:    store i16 [[CONV11]], i16* [[CONV]], align 2
// CHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i32 0, i32 2
// CHECK3-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX]], align 4
// CHECK3-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i32 3
// CHECK3-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX12]], align 4
// CHECK3-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i32 0, i32 1
// CHECK3-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i32 0, i32 2
// CHECK3-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX14]], align 8
// CHECK3-NEXT:    [[TMP24:%.*]] = mul nsw i32 1, [[TMP5]]
// CHECK3-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i32 [[TMP24]]
// CHECK3-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i32 3
// CHECK3-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX16]], align 8
// CHECK3-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0
// CHECK3-NEXT:    store i64 1, i64* [[X]], align 4
// CHECK3-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1
// CHECK3-NEXT:    store i8 1, i8* [[Y]], align 4
// CHECK3-NEXT:    [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
// CHECK3-NEXT:    call void @llvm.stackrestore(i8* [[TMP25]])
// CHECK3-NEXT:    ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111
// CHECK3-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR2]] {
// CHECK3-NEXT:  entry:
// CHECK3-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 4
// CHECK3-NEXT:    [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 4
// CHECK3-NEXT:    [[E1:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
// CHECK3-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 4
// CHECK3-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 4
// CHECK3-NEXT:    [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 4
// CHECK3-NEXT:    [[TMP1:%.*]] = bitcast %struct.TT.0* [[E1]] to i8*
// CHECK3-NEXT:    [[TMP2:%.*]] = bitcast %struct.TT.0* [[TMP0]] to i8*
// CHECK3-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 8, i1 false)
// CHECK3-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E1]], i32 0, i32 0
// CHECK3-NEXT:    [[TMP3:%.*]] = load i32, i32* [[X]], align 4
// CHECK3-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP3]] to double
// CHECK3-NEXT:    [[TMP4:%.*]] = load double*, double** [[PTR_ADDR]], align 4
// CHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP4]], i32 0
// CHECK3-NEXT:    store double [[CONV]], double* [[ARRAYIDX]], align 4
// CHECK3-NEXT:    [[TMP5:%.*]] = load double*, double** [[PTR_ADDR]], align 4
// CHECK3-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[TMP5]], i32 0
// CHECK3-NEXT:    [[TMP6:%.*]] = load double, double* [[ARRAYIDX2]], align 4
// CHECK3-NEXT:    [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00
// CHECK3-NEXT:    store double [[INC]], double* [[ARRAYIDX2]], align 4
// CHECK3-NEXT:    ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@_Z3bariPd
// CHECK3-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] {
// CHECK3-NEXT:  entry:
// CHECK3-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 4
// CHECK3-NEXT:    [[A:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 4
// CHECK3-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK3-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 4
// CHECK3-NEXT:    store i32 0, i32* [[A]], align 4
// CHECK3-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK3-NEXT:    [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 4
// CHECK3-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z3fooiPd(i32 noundef [[TMP0]], double* noundef [[TMP1]])
// CHECK3-NEXT:    [[TMP2:%.*]] = load i32, i32* [[A]], align 4
// CHECK3-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]]
// CHECK3-NEXT:    store i32 [[ADD]], i32* [[A]], align 4
// CHECK3-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK3-NEXT:    [[CALL1:%.*]] = call noundef i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 4 dereferenceable(8) [[S]], i32 noundef [[TMP3]])
// CHECK3-NEXT:    [[TMP4:%.*]] = load i32, i32* [[A]], align 4
// CHECK3-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]]
// CHECK3-NEXT:    store i32 [[ADD2]], i32* [[A]], align 4
// CHECK3-NEXT:    [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK3-NEXT:    [[CALL3:%.*]] = call noundef i32 @_ZL7fstatici(i32 noundef [[TMP5]])
// CHECK3-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A]], align 4
// CHECK3-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]]
// CHECK3-NEXT:    store i32 [[ADD4]], i32* [[A]], align 4
// CHECK3-NEXT:    [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK3-NEXT:    [[CALL5:%.*]] = call noundef i32 @_Z9ftemplateIiET_i(i32 noundef [[TMP7]])
// CHECK3-NEXT:    [[TMP8:%.*]] = load i32, i32* [[A]], align 4
// CHECK3-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]]
// CHECK3-NEXT:    store i32 [[ADD6]], i32* [[A]], align 4
// CHECK3-NEXT:    [[TMP9:%.*]] = load i32, i32* [[A]], align 4
// CHECK3-NEXT:    ret i32 [[TMP9]]
//
//
// CHECK3-LABEL: define {{[^@]+}}@_ZN2S12r1Ei
// CHECK3-SAME: (%struct.S1* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[N:%.*]]) #[[ATTR0]] comdat align 2 {
// CHECK3-NEXT:  entry:
// CHECK3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4
// CHECK3-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[B:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 4
// CHECK3-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[B_CASTED:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [5 x i8*], align 4
// CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x i8*], align 4
// CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x i8*], align 4
// CHECK3-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 4
// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK3-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4
// CHECK3-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK3-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4
// CHECK3-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK3-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// CHECK3-NEXT:    store i32 [[ADD]], i32* [[B]], align 4
// CHECK3-NEXT:    [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4
// CHECK3-NEXT:    [[TMP2:%.*]] = call i8* @llvm.stacksave()
// CHECK3-NEXT:    store i8* [[TMP2]], i8** [[SAVED_STACK]], align 4
// CHECK3-NEXT:    [[TMP3:%.*]] = mul nuw i32 2, [[TMP1]]
// CHECK3-NEXT:    [[VLA:%.*]] = alloca i16, i32 [[TMP3]], align 2
// CHECK3-NEXT:    store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4
// CHECK3-NEXT:    [[TMP4:%.*]] = load i32, i32* [[B]], align 4
// CHECK3-NEXT:    store i32 [[TMP4]], i32* [[B_CASTED]], align 4
// CHECK3-NEXT:    [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4
// CHECK3-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
// CHECK3-NEXT:    [[TMP6:%.*]] = mul nuw i32 2, [[TMP1]]
// CHECK3-NEXT:    [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2
// CHECK3-NEXT:    [[TMP8:%.*]] = sext i32 [[TMP7]] to i64
// CHECK3-NEXT:    [[TMP9:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
// CHECK3-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP9]], i8* align 4 bitcast ([5 x i64]* @.offload_sizes.5 to i8*), i32 40, i1 false)
// CHECK3-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK3-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
// CHECK3-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 4
// CHECK3-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
// CHECK3-NEXT:    store double* [[A]], double** [[TMP13]], align 4
// CHECK3-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
// CHECK3-NEXT:    store i8* null, i8** [[TMP14]], align 4
// CHECK3-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK3-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i32*
// CHECK3-NEXT:    store i32 [[TMP5]], i32* [[TMP16]], align 4
// CHECK3-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK3-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32*
// CHECK3-NEXT:    store i32 [[TMP5]], i32* [[TMP18]], align 4
// CHECK3-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
// CHECK3-NEXT:    store i8* null, i8** [[TMP19]], align 4
// CHECK3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK3-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32*
// CHECK3-NEXT:    store i32 2, i32* [[TMP21]], align 4
// CHECK3-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK3-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32*
// CHECK3-NEXT:    store i32 2, i32* [[TMP23]], align 4
// CHECK3-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
// CHECK3-NEXT:    store i8* null, i8** [[TMP24]], align 4
// CHECK3-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
// CHECK3-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i32*
// CHECK3-NEXT:    store i32 [[TMP1]], i32* [[TMP26]], align 4
// CHECK3-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
// CHECK3-NEXT:    [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i32*
// CHECK3-NEXT:    store i32 [[TMP1]], i32* [[TMP28]], align 4
// CHECK3-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
// CHECK3-NEXT:    store i8* null, i8** [[TMP29]], align 4
// CHECK3-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
// CHECK3-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i16**
// CHECK3-NEXT:    store i16* [[VLA]], i16** [[TMP31]], align 4
// CHECK3-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
// CHECK3-NEXT:    [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i16**
// CHECK3-NEXT:    store i16* [[VLA]], i16** [[TMP33]], align 4
// CHECK3-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
// CHECK3-NEXT:    store i64 [[TMP8]], i64* [[TMP34]], align 4
// CHECK3-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
// CHECK3-NEXT:    store i8* null, i8** [[TMP35]], align 4
// CHECK3-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK3-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
// CHECK3-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK3-NEXT:    store i32 2, i32* [[TMP39]], align 4
// CHECK3-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK3-NEXT:    store i32 5, i32* [[TMP40]], align 4
// CHECK3-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK3-NEXT:    store i8** [[TMP36]], i8*** [[TMP41]], align 4
// CHECK3-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK3-NEXT:    store i8** [[TMP37]], i8*** [[TMP42]], align 4
// CHECK3-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK3-NEXT:    store i64* [[TMP38]], i64** [[TMP43]], align 4
// CHECK3-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK3-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.6, i32 0, i32 0), i64** [[TMP44]], align 4
// CHECK3-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK3-NEXT:    store i8** null, i8*** [[TMP45]], align 4
// CHECK3-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK3-NEXT:    store i8** null, i8*** [[TMP46]], align 4
// CHECK3-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK3-NEXT:    store i64 0, i64* [[TMP47]], align 8
// CHECK3-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK3-NEXT:    store i64 0, i64* [[TMP48]], align 8
// CHECK3-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK3-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP49]], align 4
// CHECK3-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK3-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP50]], align 4
// CHECK3-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK3-NEXT:    store i32 0, i32* [[TMP51]], align 4
// CHECK3-NEXT:    [[TMP52:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK3-NEXT:    [[TMP53:%.*]] = icmp ne i32 [[TMP52]], 0
// CHECK3-NEXT:    br i1 [[TMP53]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK3:       omp_offload.failed:
// CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR3]]
// CHECK3-NEXT:    br label [[OMP_OFFLOAD_CONT]]
// CHECK3:       omp_offload.cont:
// CHECK3-NEXT:    [[TMP54:%.*]] = mul nsw i32 1, [[TMP1]]
// CHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP54]]
// CHECK3-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1
// CHECK3-NEXT:    [[TMP55:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
// CHECK3-NEXT:    [[CONV:%.*]] = sext i16 [[TMP55]] to i32
// CHECK3-NEXT:    [[TMP56:%.*]] = load i32, i32* [[B]], align 4
// CHECK3-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP56]]
// CHECK3-NEXT:    [[TMP57:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
// CHECK3-NEXT:    call void @llvm.stackrestore(i8* [[TMP57]])
// CHECK3-NEXT:    ret i32 [[ADD3]]
//
//
// CHECK3-LABEL: define {{[^@]+}}@_ZL7fstatici
// CHECK3-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] {
// CHECK3-NEXT:  entry:
// CHECK3-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[A:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[AAA:%.*]] = alloca i8, align 1
// CHECK3-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
// CHECK3-NEXT:    [[A_CASTED:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[AAA_CASTED:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 4
// CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 4
// CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 4
// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK3-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK3-NEXT:    store i32 0, i32* [[A]], align 4
// CHECK3-NEXT:    store i8 0, i8* [[AAA]], align 1
// CHECK3-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
// CHECK3-NEXT:    store i32 [[TMP0]], i32* [[A_CASTED]], align 4
// CHECK3-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4
// CHECK3-NEXT:    [[TMP2:%.*]] = load i8, i8* [[AAA]], align 1
// CHECK3-NEXT:    [[CONV:%.*]] = bitcast i32* [[AAA_CASTED]] to i8*
// CHECK3-NEXT:    store i8 [[TMP2]], i8* [[CONV]], align 1
// CHECK3-NEXT:    [[TMP3:%.*]] = load i32, i32* [[AAA_CASTED]], align 4
// CHECK3-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK3-NEXT:    [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i32*
// CHECK3-NEXT:    store i32 [[TMP1]], i32* [[TMP5]], align 4
// CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32*
// CHECK3-NEXT:    store i32 [[TMP1]], i32* [[TMP7]], align 4
// CHECK3-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
// CHECK3-NEXT:    store i8* null, i8** [[TMP8]], align 4
// CHECK3-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK3-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
// CHECK3-NEXT:    store i32 [[TMP3]], i32* [[TMP10]], align 4
// CHECK3-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK3-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32*
// CHECK3-NEXT:    store i32 [[TMP3]], i32* [[TMP12]], align 4
// CHECK3-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
// CHECK3-NEXT:    store i8* null, i8** [[TMP13]], align 4
// CHECK3-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
// CHECK3-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to [10 x i32]**
// CHECK3-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP15]], align 4
// CHECK3-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
// CHECK3-NEXT:    [[TMP17:%.*]] = bitcast i8** [[TMP16]] to [10 x i32]**
// CHECK3-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP17]], align 4
// CHECK3-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
// CHECK3-NEXT:    store i8* null, i8** [[TMP18]], align 4
// CHECK3-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK3-NEXT:    store i32 2, i32* [[TMP21]], align 4
// CHECK3-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK3-NEXT:    store i32 3, i32* [[TMP22]], align 4
// CHECK3-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK3-NEXT:    store i8** [[TMP19]], i8*** [[TMP23]], align 4
// CHECK3-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK3-NEXT:    store i8** [[TMP20]], i8*** [[TMP24]], align 4
// CHECK3-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK3-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.7, i32 0, i32 0), i64** [[TMP25]], align 4
// CHECK3-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK3-NEXT:    store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.8, i32 0, i32 0), i64** [[TMP26]], align 4
// CHECK3-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK3-NEXT:    store i8** null, i8*** [[TMP27]], align 4
// CHECK3-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK3-NEXT:    store i8** null, i8*** [[TMP28]], align 4
// CHECK3-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK3-NEXT:    store i64 0, i64* [[TMP29]], align 8
// CHECK3-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK3-NEXT:    store i64 0, i64* [[TMP30]], align 8
// CHECK3-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK3-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP31]], align 4
// CHECK3-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK3-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP32]], align 4
// CHECK3-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK3-NEXT:    store i32 0, i32* [[TMP33]], align 4
// CHECK3-NEXT:    [[TMP34:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK3-NEXT:    [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0
// CHECK3-NEXT:    br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK3:       omp_offload.failed:
// CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142(i32 [[TMP1]], i32 [[TMP3]], [10 x i32]* [[B]]) #[[ATTR3]]
// CHECK3-NEXT:    br label [[OMP_OFFLOAD_CONT]]
// CHECK3:       omp_offload.cont:
// CHECK3-NEXT:    [[TMP36:%.*]] = load i32, i32* [[A]], align 4
// CHECK3-NEXT:    ret i32 [[TMP36]]
//
//
// CHECK3-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i
// CHECK3-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] comdat {
// CHECK3-NEXT:  entry:
// CHECK3-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[A:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
// CHECK3-NEXT:    [[A_CASTED:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x i8*], align 4
// CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x i8*], align 4
// CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x i8*], align 4
// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
// CHECK3-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// CHECK3-NEXT:    store i32 0, i32* [[A]], align 4
// CHECK3-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
// CHECK3-NEXT:    store i32 [[TMP0]], i32* [[A_CASTED]], align 4
// CHECK3-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4
// CHECK3-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK3-NEXT:    [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32*
// CHECK3-NEXT:    store i32 [[TMP1]], i32* [[TMP3]], align 4
// CHECK3-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-NEXT:    [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i32*
// CHECK3-NEXT:    store i32 [[TMP1]], i32* [[TMP5]], align 4
// CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
// CHECK3-NEXT:    store i8* null, i8** [[TMP6]], align 4
// CHECK3-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
// CHECK3-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to [10 x i32]**
// CHECK3-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP8]], align 4
// CHECK3-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
// CHECK3-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to [10 x i32]**
// CHECK3-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[TMP10]], align 4
// CHECK3-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
// CHECK3-NEXT:    store i8* null, i8** [[TMP11]], align 4
// CHECK3-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
// CHECK3-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
// CHECK3-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
// CHECK3-NEXT:    store i32 2, i32* [[TMP14]], align 4
// CHECK3-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
// CHECK3-NEXT:    store i32 2, i32* [[TMP15]], align 4
// CHECK3-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
// CHECK3-NEXT:    store i8** [[TMP12]], i8*** [[TMP16]], align 4
// CHECK3-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
// CHECK3-NEXT:    store i8** [[TMP13]], i8*** [[TMP17]], align 4
// CHECK3-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
// CHECK3-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.9, i32 0, i32 0), i64** [[TMP18]], align 4
// CHECK3-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
// CHECK3-NEXT:    store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.10, i32 0, i32 0), i64** [[TMP19]], align 4
// CHECK3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
// CHECK3-NEXT:    store i8** null, i8*** [[TMP20]], align 4
// CHECK3-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
// CHECK3-NEXT:    store i8** null, i8*** [[TMP21]], align 4
// CHECK3-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8
// CHECK3-NEXT:    store i64 0, i64* [[TMP22]], align 8
// CHECK3-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9
// CHECK3-NEXT:    store i64 0, i64* [[TMP23]], align 8
// CHECK3-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10
// CHECK3-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP24]], align 4
// CHECK3-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11
// CHECK3-NEXT:    store [3 x i32] zeroinitializer, [3 x i32]* [[TMP25]], align 4
// CHECK3-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12
// CHECK3-NEXT:    store i32 0, i32* [[TMP26]], align 4
// CHECK3-NEXT:    [[TMP27:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
// CHECK3-NEXT:    [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0
// CHECK3-NEXT:    br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
// CHECK3:       omp_offload.failed:
// CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128(i32 [[TMP1]], [10 x i32]* [[B]]) #[[ATTR3]]
// CHECK3-NEXT:    br label [[OMP_OFFLOAD_CONT]]
// CHECK3:       omp_offload.cont:
// CHECK3-NEXT:    [[TMP29:%.*]] = load i32, i32* [[A]], align 4
// CHECK3-NEXT:    ret i32 [[TMP29]]
//
//
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167
// CHECK3-SAME: (%struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] {
// CHECK3-NEXT:  entry:
// CHECK3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4
// CHECK3-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[VLA_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[VLA_ADDR2:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[C_ADDR:%.*]] = alloca i16*, align 4
// CHECK3-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 4
// CHECK3-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4
// CHECK3-NEXT:    store i32 [[B]], i32* [[B_ADDR]], align 4
// CHECK3-NEXT:    store i32 [[VLA]], i32* [[VLA_ADDR]], align 4
// CHECK3-NEXT:    store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4
// CHECK3-NEXT:    store i16* [[C]], i16** [[C_ADDR]], align 4
// CHECK3-NEXT:    [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4
// CHECK3-NEXT:    [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4
// CHECK3-NEXT:    [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4
// CHECK3-NEXT:    [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4
// CHECK3-NEXT:    [[TMP4:%.*]] = call i8* @llvm.stacksave()
// CHECK3-NEXT:    store i8* [[TMP4]], i8** [[SAVED_STACK]], align 4
// CHECK3-NEXT:    [[TMP5:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
// CHECK3-NEXT:    [[VLA3:%.*]] = alloca i16, i32 [[TMP5]], align 2
// CHECK3-NEXT:    store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4
// CHECK3-NEXT:    store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4
// CHECK3-NEXT:    [[TMP6:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
// CHECK3-NEXT:    [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2
// CHECK3-NEXT:    [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8*
// CHECK3-NEXT:    [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8*
// CHECK3-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i32 [[TMP7]], i1 false)
// CHECK3-NEXT:    [[TMP10:%.*]] = load i32, i32* [[B_ADDR]], align 4
// CHECK3-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP10]] to double
// CHECK3-NEXT:    [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00
// CHECK3-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0
// CHECK3-NEXT:    store double [[ADD]], double* [[A]], align 4
// CHECK3-NEXT:    [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0
// CHECK3-NEXT:    [[TMP11:%.*]] = load double, double* [[A4]], align 4
// CHECK3-NEXT:    [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00
// CHECK3-NEXT:    store double [[INC]], double* [[A4]], align 4
// CHECK3-NEXT:    [[CONV5:%.*]] = fptosi double [[INC]] to i16
// CHECK3-NEXT:    [[TMP12:%.*]] = mul nsw i32 1, [[TMP2]]
// CHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i32 [[TMP12]]
// CHECK3-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1
// CHECK3-NEXT:    store i16 [[CONV5]], i16* [[ARRAYIDX6]], align 2
// CHECK3-NEXT:    [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
// CHECK3-NEXT:    call void @llvm.stackrestore(i8* [[TMP13]])
// CHECK3-NEXT:    ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142
// CHECK3-SAME: (i32 noundef [[A:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
// CHECK3-NEXT:  entry:
// CHECK3-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[AAA_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4
// CHECK3-NEXT:    [[B1:%.*]] = alloca [10 x i32], align 4
// CHECK3-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
// CHECK3-NEXT:    store i32 [[AAA]], i32* [[AAA_ADDR]], align 4
// CHECK3-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4
// CHECK3-NEXT:    [[CONV:%.*]] = bitcast i32* [[AAA_ADDR]] to i8*
// CHECK3-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4
// CHECK3-NEXT:    [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8*
// CHECK3-NEXT:    [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
// CHECK3-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false)
// CHECK3-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
// CHECK3-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// CHECK3-NEXT:    store i32 [[ADD]], i32* [[A_ADDR]], align 4
// CHECK3-NEXT:    [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1
// CHECK3-NEXT:    [[CONV2:%.*]] = sext i8 [[TMP4]] to i32
// CHECK3-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1
// CHECK3-NEXT:    [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8
// CHECK3-NEXT:    store i8 [[CONV4]], i8* [[CONV]], align 1
// CHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2
// CHECK3-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// CHECK3-NEXT:    [[ADD5:%.*]] = add nsw i32 [[TMP5]], 1
// CHECK3-NEXT:    store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4
// CHECK3-NEXT:    ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128
// CHECK3-SAME: (i32 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
// CHECK3-NEXT:  entry:
// CHECK3-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT:    [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4
// CHECK3-NEXT:    [[B1:%.*]] = alloca [10 x i32], align 4
// CHECK3-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
// CHECK3-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4
// CHECK3-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4
// CHECK3-NEXT:    [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8*
// CHECK3-NEXT:    [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
// CHECK3-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false)
// CHECK3-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
// CHECK3-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// CHECK3-NEXT:    store i32 [[ADD]], i32* [[A_ADDR]], align 4
// CHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2
// CHECK3-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// CHECK3-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1
// CHECK3-NEXT:    store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4
// CHECK3-NEXT:    ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
// CHECK3-SAME: () #[[ATTR5:[0-9]+]] {
// CHECK3-NEXT:  entry:
// CHECK3-NEXT:    call void @__tgt_register_requires(i64 1)
// CHECK3-NEXT:    ret void
//
//
// SIMD-ONLY0-LABEL: define {{[^@]+}}@_Z3fooiPd
// SIMD-ONLY0-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
// SIMD-ONLY0-NEXT:  entry:
// SIMD-ONLY0-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY0-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 8
// SIMD-ONLY0-NEXT:    [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY0-NEXT:    [[AA:%.*]] = alloca i16, align 2
// SIMD-ONLY0-NEXT:    [[B:%.*]] = alloca [10 x float], align 4
// SIMD-ONLY0-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 8
// SIMD-ONLY0-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// SIMD-ONLY0-NEXT:    [[C:%.*]] = alloca [5 x [10 x double]], align 8
// SIMD-ONLY0-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i64, align 8
// SIMD-ONLY0-NEXT:    [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 8
// SIMD-ONLY0-NEXT:    [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
// SIMD-ONLY0-NEXT:    [[P:%.*]] = alloca i32*, align 64
// SIMD-ONLY0-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// SIMD-ONLY0-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 8
// SIMD-ONLY0-NEXT:    store i32 0, i32* [[A]], align 4
// SIMD-ONLY0-NEXT:    store i16 0, i16* [[AA]], align 2
// SIMD-ONLY0-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY0-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
// SIMD-ONLY0-NEXT:    [[TMP2:%.*]] = call i8* @llvm.stacksave()
// SIMD-ONLY0-NEXT:    store i8* [[TMP2]], i8** [[SAVED_STACK]], align 8
// SIMD-ONLY0-NEXT:    [[VLA:%.*]] = alloca float, i64 [[TMP1]], align 4
// SIMD-ONLY0-NEXT:    store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8
// SIMD-ONLY0-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY0-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
// SIMD-ONLY0-NEXT:    [[TMP5:%.*]] = mul nuw i64 5, [[TMP4]]
// SIMD-ONLY0-NEXT:    [[VLA1:%.*]] = alloca double, i64 [[TMP5]], align 8
// SIMD-ONLY0-NEXT:    store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8
// SIMD-ONLY0-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0
// SIMD-ONLY0-NEXT:    [[TMP6:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY0-NEXT:    store i32 [[TMP6]], i32* [[X]], align 4
// SIMD-ONLY0-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1
// SIMD-ONLY0-NEXT:    [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY0-NEXT:    store i32 [[TMP7]], i32* [[Y]], align 4
// SIMD-ONLY0-NEXT:    store i32* [[A]], i32** [[P]], align 64
// SIMD-ONLY0-NEXT:    [[TMP8:%.*]] = load i16, i16* [[AA]], align 2
// SIMD-ONLY0-NEXT:    [[CONV:%.*]] = sext i16 [[TMP8]] to i32
// SIMD-ONLY0-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV]], 1
// SIMD-ONLY0-NEXT:    [[CONV2:%.*]] = trunc i32 [[ADD]] to i16
// SIMD-ONLY0-NEXT:    store i16 [[CONV2]], i16* [[AA]], align 2
// SIMD-ONLY0-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B]], i64 0, i64 2
// SIMD-ONLY0-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX]], align 4
// SIMD-ONLY0-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[VLA]], i64 3
// SIMD-ONLY0-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX3]], align 4
// SIMD-ONLY0-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C]], i64 0, i64 1
// SIMD-ONLY0-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX4]], i64 0, i64 2
// SIMD-ONLY0-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX5]], align 8
// SIMD-ONLY0-NEXT:    [[TMP9:%.*]] = mul nsw i64 1, [[TMP4]]
// SIMD-ONLY0-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[VLA1]], i64 [[TMP9]]
// SIMD-ONLY0-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX6]], i64 3
// SIMD-ONLY0-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX7]], align 8
// SIMD-ONLY0-NEXT:    [[X8:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D]], i32 0, i32 0
// SIMD-ONLY0-NEXT:    store i64 1, i64* [[X8]], align 8
// SIMD-ONLY0-NEXT:    [[Y9:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D]], i32 0, i32 1
// SIMD-ONLY0-NEXT:    store i8 1, i8* [[Y9]], align 8
// SIMD-ONLY0-NEXT:    [[X10:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0
// SIMD-ONLY0-NEXT:    [[TMP10:%.*]] = load i32, i32* [[X10]], align 4
// SIMD-ONLY0-NEXT:    [[CONV11:%.*]] = sitofp i32 [[TMP10]] to double
// SIMD-ONLY0-NEXT:    [[TMP11:%.*]] = load double*, double** [[PTR_ADDR]], align 8
// SIMD-ONLY0-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds double, double* [[TMP11]], i64 0
// SIMD-ONLY0-NEXT:    store double [[CONV11]], double* [[ARRAYIDX12]], align 8
// SIMD-ONLY0-NEXT:    [[TMP12:%.*]] = load double*, double** [[PTR_ADDR]], align 8
// SIMD-ONLY0-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP12]], i64 0
// SIMD-ONLY0-NEXT:    [[TMP13:%.*]] = load double, double* [[ARRAYIDX13]], align 8
// SIMD-ONLY0-NEXT:    [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00
// SIMD-ONLY0-NEXT:    store double [[INC]], double* [[ARRAYIDX13]], align 8
// SIMD-ONLY0-NEXT:    [[TMP14:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY0-NEXT:    [[TMP15:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
// SIMD-ONLY0-NEXT:    call void @llvm.stackrestore(i8* [[TMP15]])
// SIMD-ONLY0-NEXT:    ret i32 [[TMP14]]
//
//
// SIMD-ONLY0-LABEL: define {{[^@]+}}@_Z3bariPd
// SIMD-ONLY0-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] {
// SIMD-ONLY0-NEXT:  entry:
// SIMD-ONLY0-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY0-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 8
// SIMD-ONLY0-NEXT:    [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY0-NEXT:    [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 8
// SIMD-ONLY0-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// SIMD-ONLY0-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 8
// SIMD-ONLY0-NEXT:    store i32 0, i32* [[A]], align 4
// SIMD-ONLY0-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY0-NEXT:    [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 8
// SIMD-ONLY0-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z3fooiPd(i32 noundef signext [[TMP0]], double* noundef [[TMP1]])
// SIMD-ONLY0-NEXT:    [[TMP2:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY0-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]]
// SIMD-ONLY0-NEXT:    store i32 [[ADD]], i32* [[A]], align 4
// SIMD-ONLY0-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY0-NEXT:    [[CALL1:%.*]] = call noundef signext i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 8 dereferenceable(8) [[S]], i32 noundef signext [[TMP3]])
// SIMD-ONLY0-NEXT:    [[TMP4:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY0-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]]
// SIMD-ONLY0-NEXT:    store i32 [[ADD2]], i32* [[A]], align 4
// SIMD-ONLY0-NEXT:    [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY0-NEXT:    [[CALL3:%.*]] = call noundef signext i32 @_ZL7fstatici(i32 noundef signext [[TMP5]])
// SIMD-ONLY0-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY0-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]]
// SIMD-ONLY0-NEXT:    store i32 [[ADD4]], i32* [[A]], align 4
// SIMD-ONLY0-NEXT:    [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY0-NEXT:    [[CALL5:%.*]] = call noundef signext i32 @_Z9ftemplateIiET_i(i32 noundef signext [[TMP7]])
// SIMD-ONLY0-NEXT:    [[TMP8:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY0-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]]
// SIMD-ONLY0-NEXT:    store i32 [[ADD6]], i32* [[A]], align 4
// SIMD-ONLY0-NEXT:    [[TMP9:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY0-NEXT:    ret i32 [[TMP9]]
//
//
// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN2S12r1Ei
// SIMD-ONLY0-SAME: (%struct.S1* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat align 2 {
// SIMD-ONLY0-NEXT:  entry:
// SIMD-ONLY0-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
// SIMD-ONLY0-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY0-NEXT:    [[B:%.*]] = alloca i32, align 4
// SIMD-ONLY0-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 8
// SIMD-ONLY0-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// SIMD-ONLY0-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
// SIMD-ONLY0-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// SIMD-ONLY0-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
// SIMD-ONLY0-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY0-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// SIMD-ONLY0-NEXT:    store i32 [[ADD]], i32* [[B]], align 4
// SIMD-ONLY0-NEXT:    [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY0-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
// SIMD-ONLY0-NEXT:    [[TMP3:%.*]] = call i8* @llvm.stacksave()
// SIMD-ONLY0-NEXT:    store i8* [[TMP3]], i8** [[SAVED_STACK]], align 8
// SIMD-ONLY0-NEXT:    [[TMP4:%.*]] = mul nuw i64 2, [[TMP2]]
// SIMD-ONLY0-NEXT:    [[VLA:%.*]] = alloca i16, i64 [[TMP4]], align 2
// SIMD-ONLY0-NEXT:    store i64 [[TMP2]], i64* [[__VLA_EXPR0]], align 8
// SIMD-ONLY0-NEXT:    [[TMP5:%.*]] = load i32, i32* [[B]], align 4
// SIMD-ONLY0-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP5]] to double
// SIMD-ONLY0-NEXT:    [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00
// SIMD-ONLY0-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
// SIMD-ONLY0-NEXT:    store double [[ADD2]], double* [[A]], align 8
// SIMD-ONLY0-NEXT:    [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[THIS1]], i32 0, i32 0
// SIMD-ONLY0-NEXT:    [[TMP6:%.*]] = load double, double* [[A3]], align 8
// SIMD-ONLY0-NEXT:    [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00
// SIMD-ONLY0-NEXT:    store double [[INC]], double* [[A3]], align 8
// SIMD-ONLY0-NEXT:    [[CONV4:%.*]] = fptosi double [[INC]] to i16
// SIMD-ONLY0-NEXT:    [[TMP7:%.*]] = mul nsw i64 1, [[TMP2]]
// SIMD-ONLY0-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP7]]
// SIMD-ONLY0-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1
// SIMD-ONLY0-NEXT:    store i16 [[CONV4]], i16* [[ARRAYIDX5]], align 2
// SIMD-ONLY0-NEXT:    [[TMP8:%.*]] = mul nsw i64 1, [[TMP2]]
// SIMD-ONLY0-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP8]]
// SIMD-ONLY0-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX6]], i64 1
// SIMD-ONLY0-NEXT:    [[TMP9:%.*]] = load i16, i16* [[ARRAYIDX7]], align 2
// SIMD-ONLY0-NEXT:    [[CONV8:%.*]] = sext i16 [[TMP9]] to i32
// SIMD-ONLY0-NEXT:    [[TMP10:%.*]] = load i32, i32* [[B]], align 4
// SIMD-ONLY0-NEXT:    [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[TMP10]]
// SIMD-ONLY0-NEXT:    [[TMP11:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
// SIMD-ONLY0-NEXT:    call void @llvm.stackrestore(i8* [[TMP11]])
// SIMD-ONLY0-NEXT:    ret i32 [[ADD9]]
//
//
// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZL7fstatici
// SIMD-ONLY0-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] {
// SIMD-ONLY0-NEXT:  entry:
// SIMD-ONLY0-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY0-NEXT:    [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY0-NEXT:    [[AAA:%.*]] = alloca i8, align 1
// SIMD-ONLY0-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
// SIMD-ONLY0-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// SIMD-ONLY0-NEXT:    store i32 0, i32* [[A]], align 4
// SIMD-ONLY0-NEXT:    store i8 0, i8* [[AAA]], align 1
// SIMD-ONLY0-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY0-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// SIMD-ONLY0-NEXT:    store i32 [[ADD]], i32* [[A]], align 4
// SIMD-ONLY0-NEXT:    [[TMP1:%.*]] = load i8, i8* [[AAA]], align 1
// SIMD-ONLY0-NEXT:    [[CONV:%.*]] = sext i8 [[TMP1]] to i32
// SIMD-ONLY0-NEXT:    [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
// SIMD-ONLY0-NEXT:    [[CONV2:%.*]] = trunc i32 [[ADD1]] to i8
// SIMD-ONLY0-NEXT:    store i8 [[CONV2]], i8* [[AAA]], align 1
// SIMD-ONLY0-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B]], i64 0, i64 2
// SIMD-ONLY0-NEXT:    [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// SIMD-ONLY0-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP2]], 1
// SIMD-ONLY0-NEXT:    store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4
// SIMD-ONLY0-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY0-NEXT:    ret i32 [[TMP3]]
//
//
// SIMD-ONLY0-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i
// SIMD-ONLY0-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat {
// SIMD-ONLY0-NEXT:  entry:
// SIMD-ONLY0-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY0-NEXT:    [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY0-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
// SIMD-ONLY0-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// SIMD-ONLY0-NEXT:    store i32 0, i32* [[A]], align 4
// SIMD-ONLY0-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY0-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// SIMD-ONLY0-NEXT:    store i32 [[ADD]], i32* [[A]], align 4
// SIMD-ONLY0-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B]], i64 0, i64 2
// SIMD-ONLY0-NEXT:    [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// SIMD-ONLY0-NEXT:    [[ADD1:%.*]] = add nsw i32 [[TMP1]], 1
// SIMD-ONLY0-NEXT:    store i32 [[ADD1]], i32* [[ARRAYIDX]], align 4
// SIMD-ONLY0-NEXT:    [[TMP2:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY0-NEXT:    ret i32 [[TMP2]]
//
//
// SIMD-ONLY01-LABEL: define {{[^@]+}}@_Z3fooiPd
// SIMD-ONLY01-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
// SIMD-ONLY01-NEXT:  entry:
// SIMD-ONLY01-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY01-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 8
// SIMD-ONLY01-NEXT:    [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY01-NEXT:    [[AA:%.*]] = alloca i16, align 2
// SIMD-ONLY01-NEXT:    [[B:%.*]] = alloca [10 x float], align 4
// SIMD-ONLY01-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 8
// SIMD-ONLY01-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// SIMD-ONLY01-NEXT:    [[C:%.*]] = alloca [5 x [10 x double]], align 8
// SIMD-ONLY01-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i64, align 8
// SIMD-ONLY01-NEXT:    [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 8
// SIMD-ONLY01-NEXT:    [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
// SIMD-ONLY01-NEXT:    [[P:%.*]] = alloca i32*, align 64
// SIMD-ONLY01-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// SIMD-ONLY01-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 8
// SIMD-ONLY01-NEXT:    store i32 0, i32* [[A]], align 4
// SIMD-ONLY01-NEXT:    store i16 0, i16* [[AA]], align 2
// SIMD-ONLY01-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY01-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
// SIMD-ONLY01-NEXT:    [[TMP2:%.*]] = call i8* @llvm.stacksave()
// SIMD-ONLY01-NEXT:    store i8* [[TMP2]], i8** [[SAVED_STACK]], align 8
// SIMD-ONLY01-NEXT:    [[VLA:%.*]] = alloca float, i64 [[TMP1]], align 4
// SIMD-ONLY01-NEXT:    store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8
// SIMD-ONLY01-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY01-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
// SIMD-ONLY01-NEXT:    [[TMP5:%.*]] = mul nuw i64 5, [[TMP4]]
// SIMD-ONLY01-NEXT:    [[VLA1:%.*]] = alloca double, i64 [[TMP5]], align 8
// SIMD-ONLY01-NEXT:    store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8
// SIMD-ONLY01-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0
// SIMD-ONLY01-NEXT:    [[TMP6:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY01-NEXT:    store i32 [[TMP6]], i32* [[X]], align 4
// SIMD-ONLY01-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1
// SIMD-ONLY01-NEXT:    [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY01-NEXT:    store i32 [[TMP7]], i32* [[Y]], align 4
// SIMD-ONLY01-NEXT:    store i32* [[A]], i32** [[P]], align 64
// SIMD-ONLY01-NEXT:    [[TMP8:%.*]] = load i16, i16* [[AA]], align 2
// SIMD-ONLY01-NEXT:    [[CONV:%.*]] = sext i16 [[TMP8]] to i32
// SIMD-ONLY01-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV]], 1
// SIMD-ONLY01-NEXT:    [[CONV2:%.*]] = trunc i32 [[ADD]] to i16
// SIMD-ONLY01-NEXT:    store i16 [[CONV2]], i16* [[AA]], align 2
// SIMD-ONLY01-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B]], i64 0, i64 2
// SIMD-ONLY01-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX]], align 4
// SIMD-ONLY01-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[VLA]], i64 3
// SIMD-ONLY01-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX3]], align 4
// SIMD-ONLY01-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C]], i64 0, i64 1
// SIMD-ONLY01-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX4]], i64 0, i64 2
// SIMD-ONLY01-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX5]], align 8
// SIMD-ONLY01-NEXT:    [[TMP9:%.*]] = mul nsw i64 1, [[TMP4]]
// SIMD-ONLY01-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[VLA1]], i64 [[TMP9]]
// SIMD-ONLY01-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX6]], i64 3
// SIMD-ONLY01-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX7]], align 8
// SIMD-ONLY01-NEXT:    [[X8:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D]], i32 0, i32 0
// SIMD-ONLY01-NEXT:    store i64 1, i64* [[X8]], align 8
// SIMD-ONLY01-NEXT:    [[Y9:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D]], i32 0, i32 1
// SIMD-ONLY01-NEXT:    store i8 1, i8* [[Y9]], align 8
// SIMD-ONLY01-NEXT:    [[X10:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0
// SIMD-ONLY01-NEXT:    [[TMP10:%.*]] = load i32, i32* [[X10]], align 4
// SIMD-ONLY01-NEXT:    [[CONV11:%.*]] = sitofp i32 [[TMP10]] to double
// SIMD-ONLY01-NEXT:    [[TMP11:%.*]] = load double*, double** [[PTR_ADDR]], align 8
// SIMD-ONLY01-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds double, double* [[TMP11]], i64 0
// SIMD-ONLY01-NEXT:    store double [[CONV11]], double* [[ARRAYIDX12]], align 8
// SIMD-ONLY01-NEXT:    [[TMP12:%.*]] = load double*, double** [[PTR_ADDR]], align 8
// SIMD-ONLY01-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP12]], i64 0
// SIMD-ONLY01-NEXT:    [[TMP13:%.*]] = load double, double* [[ARRAYIDX13]], align 8
// SIMD-ONLY01-NEXT:    [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00
// SIMD-ONLY01-NEXT:    store double [[INC]], double* [[ARRAYIDX13]], align 8
// SIMD-ONLY01-NEXT:    [[TMP14:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY01-NEXT:    [[TMP15:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
// SIMD-ONLY01-NEXT:    call void @llvm.stackrestore(i8* [[TMP15]])
// SIMD-ONLY01-NEXT:    ret i32 [[TMP14]]
//
//
// SIMD-ONLY01-LABEL: define {{[^@]+}}@_Z3bariPd
// SIMD-ONLY01-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] {
// SIMD-ONLY01-NEXT:  entry:
// SIMD-ONLY01-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY01-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 8
// SIMD-ONLY01-NEXT:    [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY01-NEXT:    [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 8
// SIMD-ONLY01-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// SIMD-ONLY01-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 8
// SIMD-ONLY01-NEXT:    store i32 0, i32* [[A]], align 4
// SIMD-ONLY01-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY01-NEXT:    [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 8
// SIMD-ONLY01-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z3fooiPd(i32 noundef signext [[TMP0]], double* noundef [[TMP1]])
// SIMD-ONLY01-NEXT:    [[TMP2:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY01-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]]
// SIMD-ONLY01-NEXT:    store i32 [[ADD]], i32* [[A]], align 4
// SIMD-ONLY01-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY01-NEXT:    [[CALL1:%.*]] = call noundef signext i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 8 dereferenceable(8) [[S]], i32 noundef signext [[TMP3]])
// SIMD-ONLY01-NEXT:    [[TMP4:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY01-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]]
// SIMD-ONLY01-NEXT:    store i32 [[ADD2]], i32* [[A]], align 4
// SIMD-ONLY01-NEXT:    [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY01-NEXT:    [[CALL3:%.*]] = call noundef signext i32 @_ZL7fstatici(i32 noundef signext [[TMP5]])
// SIMD-ONLY01-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY01-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]]
// SIMD-ONLY01-NEXT:    store i32 [[ADD4]], i32* [[A]], align 4
// SIMD-ONLY01-NEXT:    [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY01-NEXT:    [[CALL5:%.*]] = call noundef signext i32 @_Z9ftemplateIiET_i(i32 noundef signext [[TMP7]])
// SIMD-ONLY01-NEXT:    [[TMP8:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY01-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]]
// SIMD-ONLY01-NEXT:    store i32 [[ADD6]], i32* [[A]], align 4
// SIMD-ONLY01-NEXT:    [[TMP9:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY01-NEXT:    ret i32 [[TMP9]]
//
//
// SIMD-ONLY01-LABEL: define {{[^@]+}}@_ZN2S12r1Ei
// SIMD-ONLY01-SAME: (%struct.S1* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat align 2 {
// SIMD-ONLY01-NEXT:  entry:
// SIMD-ONLY01-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
// SIMD-ONLY01-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY01-NEXT:    [[B:%.*]] = alloca i32, align 4
// SIMD-ONLY01-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 8
// SIMD-ONLY01-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// SIMD-ONLY01-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
// SIMD-ONLY01-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// SIMD-ONLY01-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
// SIMD-ONLY01-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY01-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// SIMD-ONLY01-NEXT:    store i32 [[ADD]], i32* [[B]], align 4
// SIMD-ONLY01-NEXT:    [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY01-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
// SIMD-ONLY01-NEXT:    [[TMP3:%.*]] = call i8* @llvm.stacksave()
// SIMD-ONLY01-NEXT:    store i8* [[TMP3]], i8** [[SAVED_STACK]], align 8
// SIMD-ONLY01-NEXT:    [[TMP4:%.*]] = mul nuw i64 2, [[TMP2]]
// SIMD-ONLY01-NEXT:    [[VLA:%.*]] = alloca i16, i64 [[TMP4]], align 2
// SIMD-ONLY01-NEXT:    store i64 [[TMP2]], i64* [[__VLA_EXPR0]], align 8
// SIMD-ONLY01-NEXT:    [[TMP5:%.*]] = load i32, i32* [[B]], align 4
// SIMD-ONLY01-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP5]] to double
// SIMD-ONLY01-NEXT:    [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00
// SIMD-ONLY01-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
// SIMD-ONLY01-NEXT:    store double [[ADD2]], double* [[A]], align 8
// SIMD-ONLY01-NEXT:    [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[THIS1]], i32 0, i32 0
// SIMD-ONLY01-NEXT:    [[TMP6:%.*]] = load double, double* [[A3]], align 8
// SIMD-ONLY01-NEXT:    [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00
// SIMD-ONLY01-NEXT:    store double [[INC]], double* [[A3]], align 8
// SIMD-ONLY01-NEXT:    [[CONV4:%.*]] = fptosi double [[INC]] to i16
// SIMD-ONLY01-NEXT:    [[TMP7:%.*]] = mul nsw i64 1, [[TMP2]]
// SIMD-ONLY01-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP7]]
// SIMD-ONLY01-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1
// SIMD-ONLY01-NEXT:    store i16 [[CONV4]], i16* [[ARRAYIDX5]], align 2
// SIMD-ONLY01-NEXT:    [[TMP8:%.*]] = mul nsw i64 1, [[TMP2]]
// SIMD-ONLY01-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP8]]
// SIMD-ONLY01-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX6]], i64 1
// SIMD-ONLY01-NEXT:    [[TMP9:%.*]] = load i16, i16* [[ARRAYIDX7]], align 2
// SIMD-ONLY01-NEXT:    [[CONV8:%.*]] = sext i16 [[TMP9]] to i32
// SIMD-ONLY01-NEXT:    [[TMP10:%.*]] = load i32, i32* [[B]], align 4
// SIMD-ONLY01-NEXT:    [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[TMP10]]
// SIMD-ONLY01-NEXT:    [[TMP11:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
// SIMD-ONLY01-NEXT:    call void @llvm.stackrestore(i8* [[TMP11]])
// SIMD-ONLY01-NEXT:    ret i32 [[ADD9]]
//
//
// SIMD-ONLY01-LABEL: define {{[^@]+}}@_ZL7fstatici
// SIMD-ONLY01-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] {
// SIMD-ONLY01-NEXT:  entry:
// SIMD-ONLY01-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY01-NEXT:    [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY01-NEXT:    [[AAA:%.*]] = alloca i8, align 1
// SIMD-ONLY01-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
// SIMD-ONLY01-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// SIMD-ONLY01-NEXT:    store i32 0, i32* [[A]], align 4
// SIMD-ONLY01-NEXT:    store i8 0, i8* [[AAA]], align 1
// SIMD-ONLY01-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY01-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// SIMD-ONLY01-NEXT:    store i32 [[ADD]], i32* [[A]], align 4
// SIMD-ONLY01-NEXT:    [[TMP1:%.*]] = load i8, i8* [[AAA]], align 1
// SIMD-ONLY01-NEXT:    [[CONV:%.*]] = sext i8 [[TMP1]] to i32
// SIMD-ONLY01-NEXT:    [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
// SIMD-ONLY01-NEXT:    [[CONV2:%.*]] = trunc i32 [[ADD1]] to i8
// SIMD-ONLY01-NEXT:    store i8 [[CONV2]], i8* [[AAA]], align 1
// SIMD-ONLY01-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B]], i64 0, i64 2
// SIMD-ONLY01-NEXT:    [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// SIMD-ONLY01-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP2]], 1
// SIMD-ONLY01-NEXT:    store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4
// SIMD-ONLY01-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY01-NEXT:    ret i32 [[TMP3]]
//
//
// SIMD-ONLY01-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i
// SIMD-ONLY01-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat {
// SIMD-ONLY01-NEXT:  entry:
// SIMD-ONLY01-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY01-NEXT:    [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY01-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
// SIMD-ONLY01-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// SIMD-ONLY01-NEXT:    store i32 0, i32* [[A]], align 4
// SIMD-ONLY01-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY01-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// SIMD-ONLY01-NEXT:    store i32 [[ADD]], i32* [[A]], align 4
// SIMD-ONLY01-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B]], i64 0, i64 2
// SIMD-ONLY01-NEXT:    [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// SIMD-ONLY01-NEXT:    [[ADD1:%.*]] = add nsw i32 [[TMP1]], 1
// SIMD-ONLY01-NEXT:    store i32 [[ADD1]], i32* [[ARRAYIDX]], align 4
// SIMD-ONLY01-NEXT:    [[TMP2:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY01-NEXT:    ret i32 [[TMP2]]
//
//
// SIMD-ONLY02-LABEL: define {{[^@]+}}@_Z3fooiPd
// SIMD-ONLY02-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
// SIMD-ONLY02-NEXT:  entry:
// SIMD-ONLY02-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY02-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 4
// SIMD-ONLY02-NEXT:    [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY02-NEXT:    [[AA:%.*]] = alloca i16, align 2
// SIMD-ONLY02-NEXT:    [[B:%.*]] = alloca [10 x float], align 4
// SIMD-ONLY02-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 4
// SIMD-ONLY02-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// SIMD-ONLY02-NEXT:    [[C:%.*]] = alloca [5 x [10 x double]], align 8
// SIMD-ONLY02-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i32, align 4
// SIMD-ONLY02-NEXT:    [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 4
// SIMD-ONLY02-NEXT:    [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
// SIMD-ONLY02-NEXT:    [[P:%.*]] = alloca i32*, align 64
// SIMD-ONLY02-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// SIMD-ONLY02-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 4
// SIMD-ONLY02-NEXT:    store i32 0, i32* [[A]], align 4
// SIMD-ONLY02-NEXT:    store i16 0, i16* [[AA]], align 2
// SIMD-ONLY02-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY02-NEXT:    [[TMP1:%.*]] = call i8* @llvm.stacksave()
// SIMD-ONLY02-NEXT:    store i8* [[TMP1]], i8** [[SAVED_STACK]], align 4
// SIMD-ONLY02-NEXT:    [[VLA:%.*]] = alloca float, i32 [[TMP0]], align 4
// SIMD-ONLY02-NEXT:    store i32 [[TMP0]], i32* [[__VLA_EXPR0]], align 4
// SIMD-ONLY02-NEXT:    [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY02-NEXT:    [[TMP3:%.*]] = mul nuw i32 5, [[TMP2]]
// SIMD-ONLY02-NEXT:    [[VLA1:%.*]] = alloca double, i32 [[TMP3]], align 8
// SIMD-ONLY02-NEXT:    store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4
// SIMD-ONLY02-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0
// SIMD-ONLY02-NEXT:    [[TMP4:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY02-NEXT:    store i32 [[TMP4]], i32* [[X]], align 4
// SIMD-ONLY02-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1
// SIMD-ONLY02-NEXT:    [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY02-NEXT:    store i32 [[TMP5]], i32* [[Y]], align 4
// SIMD-ONLY02-NEXT:    store i32* [[A]], i32** [[P]], align 64
// SIMD-ONLY02-NEXT:    [[TMP6:%.*]] = load i16, i16* [[AA]], align 2
// SIMD-ONLY02-NEXT:    [[CONV:%.*]] = sext i16 [[TMP6]] to i32
// SIMD-ONLY02-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV]], 1
// SIMD-ONLY02-NEXT:    [[CONV2:%.*]] = trunc i32 [[ADD]] to i16
// SIMD-ONLY02-NEXT:    store i16 [[CONV2]], i16* [[AA]], align 2
// SIMD-ONLY02-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B]], i32 0, i32 2
// SIMD-ONLY02-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX]], align 4
// SIMD-ONLY02-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[VLA]], i32 3
// SIMD-ONLY02-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX3]], align 4
// SIMD-ONLY02-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C]], i32 0, i32 1
// SIMD-ONLY02-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX4]], i32 0, i32 2
// SIMD-ONLY02-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX5]], align 8
// SIMD-ONLY02-NEXT:    [[TMP7:%.*]] = mul nsw i32 1, [[TMP2]]
// SIMD-ONLY02-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[VLA1]], i32 [[TMP7]]
// SIMD-ONLY02-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX6]], i32 3
// SIMD-ONLY02-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX7]], align 8
// SIMD-ONLY02-NEXT:    [[X8:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D]], i32 0, i32 0
// SIMD-ONLY02-NEXT:    store i64 1, i64* [[X8]], align 4
// SIMD-ONLY02-NEXT:    [[Y9:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D]], i32 0, i32 1
// SIMD-ONLY02-NEXT:    store i8 1, i8* [[Y9]], align 4
// SIMD-ONLY02-NEXT:    [[X10:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0
// SIMD-ONLY02-NEXT:    [[TMP8:%.*]] = load i32, i32* [[X10]], align 4
// SIMD-ONLY02-NEXT:    [[CONV11:%.*]] = sitofp i32 [[TMP8]] to double
// SIMD-ONLY02-NEXT:    [[TMP9:%.*]] = load double*, double** [[PTR_ADDR]], align 4
// SIMD-ONLY02-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds double, double* [[TMP9]], i32 0
// SIMD-ONLY02-NEXT:    store double [[CONV11]], double* [[ARRAYIDX12]], align 4
// SIMD-ONLY02-NEXT:    [[TMP10:%.*]] = load double*, double** [[PTR_ADDR]], align 4
// SIMD-ONLY02-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP10]], i32 0
// SIMD-ONLY02-NEXT:    [[TMP11:%.*]] = load double, double* [[ARRAYIDX13]], align 4
// SIMD-ONLY02-NEXT:    [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00
// SIMD-ONLY02-NEXT:    store double [[INC]], double* [[ARRAYIDX13]], align 4
// SIMD-ONLY02-NEXT:    [[TMP12:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY02-NEXT:    [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
// SIMD-ONLY02-NEXT:    call void @llvm.stackrestore(i8* [[TMP13]])
// SIMD-ONLY02-NEXT:    ret i32 [[TMP12]]
//
//
// SIMD-ONLY02-LABEL: define {{[^@]+}}@_Z3bariPd
// SIMD-ONLY02-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] {
// SIMD-ONLY02-NEXT:  entry:
// SIMD-ONLY02-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY02-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 4
// SIMD-ONLY02-NEXT:    [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY02-NEXT:    [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 4
// SIMD-ONLY02-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// SIMD-ONLY02-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 4
// SIMD-ONLY02-NEXT:    store i32 0, i32* [[A]], align 4
// SIMD-ONLY02-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY02-NEXT:    [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 4
// SIMD-ONLY02-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z3fooiPd(i32 noundef [[TMP0]], double* noundef [[TMP1]])
// SIMD-ONLY02-NEXT:    [[TMP2:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY02-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]]
// SIMD-ONLY02-NEXT:    store i32 [[ADD]], i32* [[A]], align 4
// SIMD-ONLY02-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY02-NEXT:    [[CALL1:%.*]] = call noundef i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 4 dereferenceable(8) [[S]], i32 noundef [[TMP3]])
// SIMD-ONLY02-NEXT:    [[TMP4:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY02-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]]
// SIMD-ONLY02-NEXT:    store i32 [[ADD2]], i32* [[A]], align 4
// SIMD-ONLY02-NEXT:    [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY02-NEXT:    [[CALL3:%.*]] = call noundef i32 @_ZL7fstatici(i32 noundef [[TMP5]])
// SIMD-ONLY02-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY02-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]]
// SIMD-ONLY02-NEXT:    store i32 [[ADD4]], i32* [[A]], align 4
// SIMD-ONLY02-NEXT:    [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY02-NEXT:    [[CALL5:%.*]] = call noundef i32 @_Z9ftemplateIiET_i(i32 noundef [[TMP7]])
// SIMD-ONLY02-NEXT:    [[TMP8:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY02-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]]
// SIMD-ONLY02-NEXT:    store i32 [[ADD6]], i32* [[A]], align 4
// SIMD-ONLY02-NEXT:    [[TMP9:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY02-NEXT:    ret i32 [[TMP9]]
//
//
// SIMD-ONLY02-LABEL: define {{[^@]+}}@_ZN2S12r1Ei
// SIMD-ONLY02-SAME: (%struct.S1* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[N:%.*]]) #[[ATTR0]] comdat align 2 {
// SIMD-ONLY02-NEXT:  entry:
// SIMD-ONLY02-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4
// SIMD-ONLY02-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY02-NEXT:    [[B:%.*]] = alloca i32, align 4
// SIMD-ONLY02-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 4
// SIMD-ONLY02-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// SIMD-ONLY02-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4
// SIMD-ONLY02-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// SIMD-ONLY02-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4
// SIMD-ONLY02-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY02-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// SIMD-ONLY02-NEXT:    store i32 [[ADD]], i32* [[B]], align 4
// SIMD-ONLY02-NEXT:    [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY02-NEXT:    [[TMP2:%.*]] = call i8* @llvm.stacksave()
// SIMD-ONLY02-NEXT:    store i8* [[TMP2]], i8** [[SAVED_STACK]], align 4
// SIMD-ONLY02-NEXT:    [[TMP3:%.*]] = mul nuw i32 2, [[TMP1]]
// SIMD-ONLY02-NEXT:    [[VLA:%.*]] = alloca i16, i32 [[TMP3]], align 2
// SIMD-ONLY02-NEXT:    store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4
// SIMD-ONLY02-NEXT:    [[TMP4:%.*]] = load i32, i32* [[B]], align 4
// SIMD-ONLY02-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP4]] to double
// SIMD-ONLY02-NEXT:    [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00
// SIMD-ONLY02-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
// SIMD-ONLY02-NEXT:    store double [[ADD2]], double* [[A]], align 4
// SIMD-ONLY02-NEXT:    [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[THIS1]], i32 0, i32 0
// SIMD-ONLY02-NEXT:    [[TMP5:%.*]] = load double, double* [[A3]], align 4
// SIMD-ONLY02-NEXT:    [[INC:%.*]] = fadd double [[TMP5]], 1.000000e+00
// SIMD-ONLY02-NEXT:    store double [[INC]], double* [[A3]], align 4
// SIMD-ONLY02-NEXT:    [[CONV4:%.*]] = fptosi double [[INC]] to i16
// SIMD-ONLY02-NEXT:    [[TMP6:%.*]] = mul nsw i32 1, [[TMP1]]
// SIMD-ONLY02-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP6]]
// SIMD-ONLY02-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1
// SIMD-ONLY02-NEXT:    store i16 [[CONV4]], i16* [[ARRAYIDX5]], align 2
// SIMD-ONLY02-NEXT:    [[TMP7:%.*]] = mul nsw i32 1, [[TMP1]]
// SIMD-ONLY02-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP7]]
// SIMD-ONLY02-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX6]], i32 1
// SIMD-ONLY02-NEXT:    [[TMP8:%.*]] = load i16, i16* [[ARRAYIDX7]], align 2
// SIMD-ONLY02-NEXT:    [[CONV8:%.*]] = sext i16 [[TMP8]] to i32
// SIMD-ONLY02-NEXT:    [[TMP9:%.*]] = load i32, i32* [[B]], align 4
// SIMD-ONLY02-NEXT:    [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[TMP9]]
// SIMD-ONLY02-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
// SIMD-ONLY02-NEXT:    call void @llvm.stackrestore(i8* [[TMP10]])
// SIMD-ONLY02-NEXT:    ret i32 [[ADD9]]
//
//
// SIMD-ONLY02-LABEL: define {{[^@]+}}@_ZL7fstatici
// SIMD-ONLY02-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] {
// SIMD-ONLY02-NEXT:  entry:
// SIMD-ONLY02-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY02-NEXT:    [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY02-NEXT:    [[AAA:%.*]] = alloca i8, align 1
// SIMD-ONLY02-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
// SIMD-ONLY02-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// SIMD-ONLY02-NEXT:    store i32 0, i32* [[A]], align 4
// SIMD-ONLY02-NEXT:    store i8 0, i8* [[AAA]], align 1
// SIMD-ONLY02-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY02-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// SIMD-ONLY02-NEXT:    store i32 [[ADD]], i32* [[A]], align 4
// SIMD-ONLY02-NEXT:    [[TMP1:%.*]] = load i8, i8* [[AAA]], align 1
// SIMD-ONLY02-NEXT:    [[CONV:%.*]] = sext i8 [[TMP1]] to i32
// SIMD-ONLY02-NEXT:    [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
// SIMD-ONLY02-NEXT:    [[CONV2:%.*]] = trunc i32 [[ADD1]] to i8
// SIMD-ONLY02-NEXT:    store i8 [[CONV2]], i8* [[AAA]], align 1
// SIMD-ONLY02-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B]], i32 0, i32 2
// SIMD-ONLY02-NEXT:    [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// SIMD-ONLY02-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP2]], 1
// SIMD-ONLY02-NEXT:    store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4
// SIMD-ONLY02-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY02-NEXT:    ret i32 [[TMP3]]
//
//
// SIMD-ONLY02-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i
// SIMD-ONLY02-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] comdat {
// SIMD-ONLY02-NEXT:  entry:
// SIMD-ONLY02-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY02-NEXT:    [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY02-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
// SIMD-ONLY02-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// SIMD-ONLY02-NEXT:    store i32 0, i32* [[A]], align 4
// SIMD-ONLY02-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY02-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// SIMD-ONLY02-NEXT:    store i32 [[ADD]], i32* [[A]], align 4
// SIMD-ONLY02-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B]], i32 0, i32 2
// SIMD-ONLY02-NEXT:    [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// SIMD-ONLY02-NEXT:    [[ADD1:%.*]] = add nsw i32 [[TMP1]], 1
// SIMD-ONLY02-NEXT:    store i32 [[ADD1]], i32* [[ARRAYIDX]], align 4
// SIMD-ONLY02-NEXT:    [[TMP2:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY02-NEXT:    ret i32 [[TMP2]]
//
//
// SIMD-ONLY03-LABEL: define {{[^@]+}}@_Z3fooiPd
// SIMD-ONLY03-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
// SIMD-ONLY03-NEXT:  entry:
// SIMD-ONLY03-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY03-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 4
// SIMD-ONLY03-NEXT:    [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY03-NEXT:    [[AA:%.*]] = alloca i16, align 2
// SIMD-ONLY03-NEXT:    [[B:%.*]] = alloca [10 x float], align 4
// SIMD-ONLY03-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 4
// SIMD-ONLY03-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// SIMD-ONLY03-NEXT:    [[C:%.*]] = alloca [5 x [10 x double]], align 8
// SIMD-ONLY03-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i32, align 4
// SIMD-ONLY03-NEXT:    [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 4
// SIMD-ONLY03-NEXT:    [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
// SIMD-ONLY03-NEXT:    [[P:%.*]] = alloca i32*, align 64
// SIMD-ONLY03-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// SIMD-ONLY03-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 4
// SIMD-ONLY03-NEXT:    store i32 0, i32* [[A]], align 4
// SIMD-ONLY03-NEXT:    store i16 0, i16* [[AA]], align 2
// SIMD-ONLY03-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY03-NEXT:    [[TMP1:%.*]] = call i8* @llvm.stacksave()
// SIMD-ONLY03-NEXT:    store i8* [[TMP1]], i8** [[SAVED_STACK]], align 4
// SIMD-ONLY03-NEXT:    [[VLA:%.*]] = alloca float, i32 [[TMP0]], align 4
// SIMD-ONLY03-NEXT:    store i32 [[TMP0]], i32* [[__VLA_EXPR0]], align 4
// SIMD-ONLY03-NEXT:    [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY03-NEXT:    [[TMP3:%.*]] = mul nuw i32 5, [[TMP2]]
// SIMD-ONLY03-NEXT:    [[VLA1:%.*]] = alloca double, i32 [[TMP3]], align 8
// SIMD-ONLY03-NEXT:    store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4
// SIMD-ONLY03-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0
// SIMD-ONLY03-NEXT:    [[TMP4:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY03-NEXT:    store i32 [[TMP4]], i32* [[X]], align 4
// SIMD-ONLY03-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1
// SIMD-ONLY03-NEXT:    [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY03-NEXT:    store i32 [[TMP5]], i32* [[Y]], align 4
// SIMD-ONLY03-NEXT:    store i32* [[A]], i32** [[P]], align 64
// SIMD-ONLY03-NEXT:    [[TMP6:%.*]] = load i16, i16* [[AA]], align 2
// SIMD-ONLY03-NEXT:    [[CONV:%.*]] = sext i16 [[TMP6]] to i32
// SIMD-ONLY03-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV]], 1
// SIMD-ONLY03-NEXT:    [[CONV2:%.*]] = trunc i32 [[ADD]] to i16
// SIMD-ONLY03-NEXT:    store i16 [[CONV2]], i16* [[AA]], align 2
// SIMD-ONLY03-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B]], i32 0, i32 2
// SIMD-ONLY03-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX]], align 4
// SIMD-ONLY03-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[VLA]], i32 3
// SIMD-ONLY03-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX3]], align 4
// SIMD-ONLY03-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C]], i32 0, i32 1
// SIMD-ONLY03-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX4]], i32 0, i32 2
// SIMD-ONLY03-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX5]], align 8
// SIMD-ONLY03-NEXT:    [[TMP7:%.*]] = mul nsw i32 1, [[TMP2]]
// SIMD-ONLY03-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[VLA1]], i32 [[TMP7]]
// SIMD-ONLY03-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX6]], i32 3
// SIMD-ONLY03-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX7]], align 8
// SIMD-ONLY03-NEXT:    [[X8:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D]], i32 0, i32 0
// SIMD-ONLY03-NEXT:    store i64 1, i64* [[X8]], align 4
// SIMD-ONLY03-NEXT:    [[Y9:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D]], i32 0, i32 1
// SIMD-ONLY03-NEXT:    store i8 1, i8* [[Y9]], align 4
// SIMD-ONLY03-NEXT:    [[X10:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0
// SIMD-ONLY03-NEXT:    [[TMP8:%.*]] = load i32, i32* [[X10]], align 4
// SIMD-ONLY03-NEXT:    [[CONV11:%.*]] = sitofp i32 [[TMP8]] to double
// SIMD-ONLY03-NEXT:    [[TMP9:%.*]] = load double*, double** [[PTR_ADDR]], align 4
// SIMD-ONLY03-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds double, double* [[TMP9]], i32 0
// SIMD-ONLY03-NEXT:    store double [[CONV11]], double* [[ARRAYIDX12]], align 4
// SIMD-ONLY03-NEXT:    [[TMP10:%.*]] = load double*, double** [[PTR_ADDR]], align 4
// SIMD-ONLY03-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP10]], i32 0
// SIMD-ONLY03-NEXT:    [[TMP11:%.*]] = load double, double* [[ARRAYIDX13]], align 4
// SIMD-ONLY03-NEXT:    [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00
// SIMD-ONLY03-NEXT:    store double [[INC]], double* [[ARRAYIDX13]], align 4
// SIMD-ONLY03-NEXT:    [[TMP12:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY03-NEXT:    [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
// SIMD-ONLY03-NEXT:    call void @llvm.stackrestore(i8* [[TMP13]])
// SIMD-ONLY03-NEXT:    ret i32 [[TMP12]]
//
//
// SIMD-ONLY03-LABEL: define {{[^@]+}}@_Z3bariPd
// SIMD-ONLY03-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] {
// SIMD-ONLY03-NEXT:  entry:
// SIMD-ONLY03-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY03-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 4
// SIMD-ONLY03-NEXT:    [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY03-NEXT:    [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 4
// SIMD-ONLY03-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// SIMD-ONLY03-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 4
// SIMD-ONLY03-NEXT:    store i32 0, i32* [[A]], align 4
// SIMD-ONLY03-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY03-NEXT:    [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 4
// SIMD-ONLY03-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z3fooiPd(i32 noundef [[TMP0]], double* noundef [[TMP1]])
// SIMD-ONLY03-NEXT:    [[TMP2:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY03-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]]
// SIMD-ONLY03-NEXT:    store i32 [[ADD]], i32* [[A]], align 4
// SIMD-ONLY03-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY03-NEXT:    [[CALL1:%.*]] = call noundef i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 4 dereferenceable(8) [[S]], i32 noundef [[TMP3]])
// SIMD-ONLY03-NEXT:    [[TMP4:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY03-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]]
// SIMD-ONLY03-NEXT:    store i32 [[ADD2]], i32* [[A]], align 4
// SIMD-ONLY03-NEXT:    [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY03-NEXT:    [[CALL3:%.*]] = call noundef i32 @_ZL7fstatici(i32 noundef [[TMP5]])
// SIMD-ONLY03-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY03-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]]
// SIMD-ONLY03-NEXT:    store i32 [[ADD4]], i32* [[A]], align 4
// SIMD-ONLY03-NEXT:    [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY03-NEXT:    [[CALL5:%.*]] = call noundef i32 @_Z9ftemplateIiET_i(i32 noundef [[TMP7]])
// SIMD-ONLY03-NEXT:    [[TMP8:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY03-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]]
// SIMD-ONLY03-NEXT:    store i32 [[ADD6]], i32* [[A]], align 4
// SIMD-ONLY03-NEXT:    [[TMP9:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY03-NEXT:    ret i32 [[TMP9]]
//
//
// SIMD-ONLY03-LABEL: define {{[^@]+}}@_ZN2S12r1Ei
// SIMD-ONLY03-SAME: (%struct.S1* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[N:%.*]]) #[[ATTR0]] comdat align 2 {
// SIMD-ONLY03-NEXT:  entry:
// SIMD-ONLY03-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4
// SIMD-ONLY03-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY03-NEXT:    [[B:%.*]] = alloca i32, align 4
// SIMD-ONLY03-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 4
// SIMD-ONLY03-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// SIMD-ONLY03-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4
// SIMD-ONLY03-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// SIMD-ONLY03-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4
// SIMD-ONLY03-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY03-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// SIMD-ONLY03-NEXT:    store i32 [[ADD]], i32* [[B]], align 4
// SIMD-ONLY03-NEXT:    [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY03-NEXT:    [[TMP2:%.*]] = call i8* @llvm.stacksave()
// SIMD-ONLY03-NEXT:    store i8* [[TMP2]], i8** [[SAVED_STACK]], align 4
// SIMD-ONLY03-NEXT:    [[TMP3:%.*]] = mul nuw i32 2, [[TMP1]]
// SIMD-ONLY03-NEXT:    [[VLA:%.*]] = alloca i16, i32 [[TMP3]], align 2
// SIMD-ONLY03-NEXT:    store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4
// SIMD-ONLY03-NEXT:    [[TMP4:%.*]] = load i32, i32* [[B]], align 4
// SIMD-ONLY03-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP4]] to double
// SIMD-ONLY03-NEXT:    [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00
// SIMD-ONLY03-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
// SIMD-ONLY03-NEXT:    store double [[ADD2]], double* [[A]], align 4
// SIMD-ONLY03-NEXT:    [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[THIS1]], i32 0, i32 0
// SIMD-ONLY03-NEXT:    [[TMP5:%.*]] = load double, double* [[A3]], align 4
// SIMD-ONLY03-NEXT:    [[INC:%.*]] = fadd double [[TMP5]], 1.000000e+00
// SIMD-ONLY03-NEXT:    store double [[INC]], double* [[A3]], align 4
// SIMD-ONLY03-NEXT:    [[CONV4:%.*]] = fptosi double [[INC]] to i16
// SIMD-ONLY03-NEXT:    [[TMP6:%.*]] = mul nsw i32 1, [[TMP1]]
// SIMD-ONLY03-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP6]]
// SIMD-ONLY03-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1
// SIMD-ONLY03-NEXT:    store i16 [[CONV4]], i16* [[ARRAYIDX5]], align 2
// SIMD-ONLY03-NEXT:    [[TMP7:%.*]] = mul nsw i32 1, [[TMP1]]
// SIMD-ONLY03-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP7]]
// SIMD-ONLY03-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX6]], i32 1
// SIMD-ONLY03-NEXT:    [[TMP8:%.*]] = load i16, i16* [[ARRAYIDX7]], align 2
// SIMD-ONLY03-NEXT:    [[CONV8:%.*]] = sext i16 [[TMP8]] to i32
// SIMD-ONLY03-NEXT:    [[TMP9:%.*]] = load i32, i32* [[B]], align 4
// SIMD-ONLY03-NEXT:    [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[TMP9]]
// SIMD-ONLY03-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
// SIMD-ONLY03-NEXT:    call void @llvm.stackrestore(i8* [[TMP10]])
// SIMD-ONLY03-NEXT:    ret i32 [[ADD9]]
//
//
// SIMD-ONLY03-LABEL: define {{[^@]+}}@_ZL7fstatici
// SIMD-ONLY03-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] {
// SIMD-ONLY03-NEXT:  entry:
// SIMD-ONLY03-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY03-NEXT:    [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY03-NEXT:    [[AAA:%.*]] = alloca i8, align 1
// SIMD-ONLY03-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
// SIMD-ONLY03-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// SIMD-ONLY03-NEXT:    store i32 0, i32* [[A]], align 4
// SIMD-ONLY03-NEXT:    store i8 0, i8* [[AAA]], align 1
// SIMD-ONLY03-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY03-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// SIMD-ONLY03-NEXT:    store i32 [[ADD]], i32* [[A]], align 4
// SIMD-ONLY03-NEXT:    [[TMP1:%.*]] = load i8, i8* [[AAA]], align 1
// SIMD-ONLY03-NEXT:    [[CONV:%.*]] = sext i8 [[TMP1]] to i32
// SIMD-ONLY03-NEXT:    [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
// SIMD-ONLY03-NEXT:    [[CONV2:%.*]] = trunc i32 [[ADD1]] to i8
// SIMD-ONLY03-NEXT:    store i8 [[CONV2]], i8* [[AAA]], align 1
// SIMD-ONLY03-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B]], i32 0, i32 2
// SIMD-ONLY03-NEXT:    [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// SIMD-ONLY03-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP2]], 1
// SIMD-ONLY03-NEXT:    store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4
// SIMD-ONLY03-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY03-NEXT:    ret i32 [[TMP3]]
//
//
// SIMD-ONLY03-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i
// SIMD-ONLY03-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] comdat {
// SIMD-ONLY03-NEXT:  entry:
// SIMD-ONLY03-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY03-NEXT:    [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY03-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
// SIMD-ONLY03-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// SIMD-ONLY03-NEXT:    store i32 0, i32* [[A]], align 4
// SIMD-ONLY03-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY03-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// SIMD-ONLY03-NEXT:    store i32 [[ADD]], i32* [[A]], align 4
// SIMD-ONLY03-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B]], i32 0, i32 2
// SIMD-ONLY03-NEXT:    [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// SIMD-ONLY03-NEXT:    [[ADD1:%.*]] = add nsw i32 [[TMP1]], 1
// SIMD-ONLY03-NEXT:    store i32 [[ADD1]], i32* [[ARRAYIDX]], align 4
// SIMD-ONLY03-NEXT:    [[TMP2:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY03-NEXT:    ret i32 [[TMP2]]
//
//
// TCHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63
// TCHECK-SAME: (i64 noundef [[A:%.*]], i32* noundef [[P:%.*]], i64 noundef [[GA:%.*]]) #[[ATTR0:[0-9]+]] {
// TCHECK-NEXT:  entry:
// TCHECK-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
// TCHECK-NEXT:    [[P_ADDR:%.*]] = alloca i32*, align 8
// TCHECK-NEXT:    [[GA_ADDR:%.*]] = alloca i64, align 8
// TCHECK-NEXT:    store i64 [[A]], i64* [[A_ADDR]], align 8
// TCHECK-NEXT:    store i32* [[P]], i32** [[P_ADDR]], align 8
// TCHECK-NEXT:    store i64 [[GA]], i64* [[GA_ADDR]], align 8
// TCHECK-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
// TCHECK-NEXT:    [[CONV1:%.*]] = bitcast i64* [[GA_ADDR]] to i32*
// TCHECK-NEXT:    ret void
//
//
// TCHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70
// TCHECK-SAME: (i64 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR0]] {
// TCHECK-NEXT:  entry:
// TCHECK-NEXT:    [[AA_ADDR:%.*]] = alloca i64, align 8
// TCHECK-NEXT:    [[B_ADDR:%.*]] = alloca [10 x float]*, align 8
// TCHECK-NEXT:    [[VLA_ADDR:%.*]] = alloca i64, align 8
// TCHECK-NEXT:    [[BN_ADDR:%.*]] = alloca float*, align 8
// TCHECK-NEXT:    [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 8
// TCHECK-NEXT:    [[VLA_ADDR2:%.*]] = alloca i64, align 8
// TCHECK-NEXT:    [[VLA_ADDR4:%.*]] = alloca i64, align 8
// TCHECK-NEXT:    [[CN_ADDR:%.*]] = alloca double*, align 8
// TCHECK-NEXT:    [[D_ADDR:%.*]] = alloca %struct.TT*, align 8
// TCHECK-NEXT:    [[B5:%.*]] = alloca [10 x float], align 4
// TCHECK-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 8
// TCHECK-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// TCHECK-NEXT:    [[C7:%.*]] = alloca [5 x [10 x double]], align 8
// TCHECK-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i64, align 8
// TCHECK-NEXT:    [[__VLA_EXPR2:%.*]] = alloca i64, align 8
// TCHECK-NEXT:    [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 8
// TCHECK-NEXT:    store i64 [[AA]], i64* [[AA_ADDR]], align 8
// TCHECK-NEXT:    store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8
// TCHECK-NEXT:    store i64 [[VLA]], i64* [[VLA_ADDR]], align 8
// TCHECK-NEXT:    store float* [[BN]], float** [[BN_ADDR]], align 8
// TCHECK-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 8
// TCHECK-NEXT:    store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8
// TCHECK-NEXT:    store i64 [[VLA3]], i64* [[VLA_ADDR4]], align 8
// TCHECK-NEXT:    store double* [[CN]], double** [[CN_ADDR]], align 8
// TCHECK-NEXT:    store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 8
// TCHECK-NEXT:    [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16*
// TCHECK-NEXT:    [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 8
// TCHECK-NEXT:    [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8
// TCHECK-NEXT:    [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 8
// TCHECK-NEXT:    [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 8
// TCHECK-NEXT:    [[TMP4:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8
// TCHECK-NEXT:    [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8
// TCHECK-NEXT:    [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8
// TCHECK-NEXT:    [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8
// TCHECK-NEXT:    [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8*
// TCHECK-NEXT:    [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8*
// TCHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i64 40, i1 false)
// TCHECK-NEXT:    [[TMP10:%.*]] = call i8* @llvm.stacksave()
// TCHECK-NEXT:    store i8* [[TMP10]], i8** [[SAVED_STACK]], align 8
// TCHECK-NEXT:    [[VLA6:%.*]] = alloca float, i64 [[TMP1]], align 4
// TCHECK-NEXT:    store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8
// TCHECK-NEXT:    [[TMP11:%.*]] = mul nuw i64 [[TMP1]], 4
// TCHECK-NEXT:    [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8*
// TCHECK-NEXT:    [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8*
// TCHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 [[TMP11]], i1 false)
// TCHECK-NEXT:    [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8*
// TCHECK-NEXT:    [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8*
// TCHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 400, i1 false)
// TCHECK-NEXT:    [[TMP16:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]]
// TCHECK-NEXT:    [[VLA8:%.*]] = alloca double, i64 [[TMP16]], align 8
// TCHECK-NEXT:    store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8
// TCHECK-NEXT:    store i64 [[TMP5]], i64* [[__VLA_EXPR2]], align 8
// TCHECK-NEXT:    [[TMP17:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]]
// TCHECK-NEXT:    [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 8
// TCHECK-NEXT:    [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8*
// TCHECK-NEXT:    [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8*
// TCHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i64 [[TMP18]], i1 false)
// TCHECK-NEXT:    [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8*
// TCHECK-NEXT:    [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8*
// TCHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP21]], i8* align 8 [[TMP22]], i64 16, i1 false)
// TCHECK-NEXT:    [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2
// TCHECK-NEXT:    [[CONV10:%.*]] = sext i16 [[TMP23]] to i32
// TCHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV10]], 1
// TCHECK-NEXT:    [[CONV11:%.*]] = trunc i32 [[ADD]] to i16
// TCHECK-NEXT:    store i16 [[CONV11]], i16* [[CONV]], align 2
// TCHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i64 0, i64 2
// TCHECK-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX]], align 4
// TCHECK-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i64 3
// TCHECK-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX12]], align 4
// TCHECK-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i64 0, i64 1
// TCHECK-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i64 0, i64 2
// TCHECK-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX14]], align 8
// TCHECK-NEXT:    [[TMP24:%.*]] = mul nsw i64 1, [[TMP5]]
// TCHECK-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i64 [[TMP24]]
// TCHECK-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i64 3
// TCHECK-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX16]], align 8
// TCHECK-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0
// TCHECK-NEXT:    store i64 1, i64* [[X]], align 8
// TCHECK-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1
// TCHECK-NEXT:    store i8 1, i8* [[Y]], align 8
// TCHECK-NEXT:    [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
// TCHECK-NEXT:    call void @llvm.stackrestore(i8* [[TMP25]])
// TCHECK-NEXT:    ret void
//
//
// TCHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111
// TCHECK-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR0]] {
// TCHECK-NEXT:  entry:
// TCHECK-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 8
// TCHECK-NEXT:    [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 8
// TCHECK-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 8
// TCHECK-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 8
// TCHECK-NEXT:    [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 8
// TCHECK-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0:%.*]], %struct.TT.0* [[TMP0]], i32 0, i32 0
// TCHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[X]], align 4
// TCHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP1]] to double
// TCHECK-NEXT:    [[TMP2:%.*]] = load double*, double** [[PTR_ADDR]], align 8
// TCHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP2]], i64 0
// TCHECK-NEXT:    store double [[CONV]], double* [[ARRAYIDX]], align 8
// TCHECK-NEXT:    [[TMP3:%.*]] = load double*, double** [[PTR_ADDR]], align 8
// TCHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[TMP3]], i64 0
// TCHECK-NEXT:    [[TMP4:%.*]] = load double, double* [[ARRAYIDX1]], align 8
// TCHECK-NEXT:    [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00
// TCHECK-NEXT:    store double [[INC]], double* [[ARRAYIDX1]], align 8
// TCHECK-NEXT:    ret void
//
//
// TCHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142
// TCHECK-SAME: (i64 noundef [[A:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// TCHECK-NEXT:  entry:
// TCHECK-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
// TCHECK-NEXT:    [[AAA_ADDR:%.*]] = alloca i64, align 8
// TCHECK-NEXT:    [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8
// TCHECK-NEXT:    [[B2:%.*]] = alloca [10 x i32], align 4
// TCHECK-NEXT:    store i64 [[A]], i64* [[A_ADDR]], align 8
// TCHECK-NEXT:    store i64 [[AAA]], i64* [[AAA_ADDR]], align 8
// TCHECK-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8
// TCHECK-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
// TCHECK-NEXT:    [[CONV1:%.*]] = bitcast i64* [[AAA_ADDR]] to i8*
// TCHECK-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8
// TCHECK-NEXT:    [[TMP1:%.*]] = bitcast [10 x i32]* [[B2]] to i8*
// TCHECK-NEXT:    [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
// TCHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false)
// TCHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4
// TCHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// TCHECK-NEXT:    store i32 [[ADD]], i32* [[CONV]], align 4
// TCHECK-NEXT:    [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 1
// TCHECK-NEXT:    [[CONV3:%.*]] = sext i8 [[TMP4]] to i32
// TCHECK-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1
// TCHECK-NEXT:    [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8
// TCHECK-NEXT:    store i8 [[CONV5]], i8* [[CONV1]], align 1
// TCHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B2]], i64 0, i64 2
// TCHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// TCHECK-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP5]], 1
// TCHECK-NEXT:    store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4
// TCHECK-NEXT:    ret void
//
//
// TCHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167
// TCHECK-SAME: (%struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR0]] {
// TCHECK-NEXT:  entry:
// TCHECK-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
// TCHECK-NEXT:    [[B_ADDR:%.*]] = alloca i64, align 8
// TCHECK-NEXT:    [[VLA_ADDR:%.*]] = alloca i64, align 8
// TCHECK-NEXT:    [[VLA_ADDR2:%.*]] = alloca i64, align 8
// TCHECK-NEXT:    [[C_ADDR:%.*]] = alloca i16*, align 8
// TCHECK-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 8
// TCHECK-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// TCHECK-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i64, align 8
// TCHECK-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
// TCHECK-NEXT:    store i64 [[B]], i64* [[B_ADDR]], align 8
// TCHECK-NEXT:    store i64 [[VLA]], i64* [[VLA_ADDR]], align 8
// TCHECK-NEXT:    store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8
// TCHECK-NEXT:    store i16* [[C]], i16** [[C_ADDR]], align 8
// TCHECK-NEXT:    [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
// TCHECK-NEXT:    [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32*
// TCHECK-NEXT:    [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8
// TCHECK-NEXT:    [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8
// TCHECK-NEXT:    [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8
// TCHECK-NEXT:    [[TMP4:%.*]] = call i8* @llvm.stacksave()
// TCHECK-NEXT:    store i8* [[TMP4]], i8** [[SAVED_STACK]], align 8
// TCHECK-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]]
// TCHECK-NEXT:    [[VLA3:%.*]] = alloca i16, i64 [[TMP5]], align 2
// TCHECK-NEXT:    store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8
// TCHECK-NEXT:    store i64 [[TMP2]], i64* [[__VLA_EXPR1]], align 8
// TCHECK-NEXT:    [[TMP6:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]]
// TCHECK-NEXT:    [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 2
// TCHECK-NEXT:    [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8*
// TCHECK-NEXT:    [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8*
// TCHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i64 [[TMP7]], i1 false)
// TCHECK-NEXT:    [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4
// TCHECK-NEXT:    [[CONV4:%.*]] = sitofp i32 [[TMP10]] to double
// TCHECK-NEXT:    [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00
// TCHECK-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0
// TCHECK-NEXT:    store double [[ADD]], double* [[A]], align 8
// TCHECK-NEXT:    [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0
// TCHECK-NEXT:    [[TMP11:%.*]] = load double, double* [[A5]], align 8
// TCHECK-NEXT:    [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00
// TCHECK-NEXT:    store double [[INC]], double* [[A5]], align 8
// TCHECK-NEXT:    [[CONV6:%.*]] = fptosi double [[INC]] to i16
// TCHECK-NEXT:    [[TMP12:%.*]] = mul nsw i64 1, [[TMP2]]
// TCHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i64 [[TMP12]]
// TCHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1
// TCHECK-NEXT:    store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2
// TCHECK-NEXT:    [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
// TCHECK-NEXT:    call void @llvm.stackrestore(i8* [[TMP13]])
// TCHECK-NEXT:    ret void
//
//
// TCHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128
// TCHECK-SAME: (i64 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// TCHECK-NEXT:  entry:
// TCHECK-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
// TCHECK-NEXT:    [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8
// TCHECK-NEXT:    [[B1:%.*]] = alloca [10 x i32], align 4
// TCHECK-NEXT:    store i64 [[A]], i64* [[A_ADDR]], align 8
// TCHECK-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8
// TCHECK-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
// TCHECK-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8
// TCHECK-NEXT:    [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8*
// TCHECK-NEXT:    [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
// TCHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false)
// TCHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4
// TCHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// TCHECK-NEXT:    store i32 [[ADD]], i32* [[CONV]], align 4
// TCHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i64 0, i64 2
// TCHECK-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// TCHECK-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1
// TCHECK-NEXT:    store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4
// TCHECK-NEXT:    ret void
//
//
// TCHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63
// TCHECK1-SAME: (i64 noundef [[A:%.*]], i32* noundef [[P:%.*]], i64 noundef [[GA:%.*]]) #[[ATTR0:[0-9]+]] {
// TCHECK1-NEXT:  entry:
// TCHECK1-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
// TCHECK1-NEXT:    [[P_ADDR:%.*]] = alloca i32*, align 8
// TCHECK1-NEXT:    [[GA_ADDR:%.*]] = alloca i64, align 8
// TCHECK1-NEXT:    store i64 [[A]], i64* [[A_ADDR]], align 8
// TCHECK1-NEXT:    store i32* [[P]], i32** [[P_ADDR]], align 8
// TCHECK1-NEXT:    store i64 [[GA]], i64* [[GA_ADDR]], align 8
// TCHECK1-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
// TCHECK1-NEXT:    [[CONV1:%.*]] = bitcast i64* [[GA_ADDR]] to i32*
// TCHECK1-NEXT:    ret void
//
//
// TCHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70
// TCHECK1-SAME: (i64 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR0]] {
// TCHECK1-NEXT:  entry:
// TCHECK1-NEXT:    [[AA_ADDR:%.*]] = alloca i64, align 8
// TCHECK1-NEXT:    [[B_ADDR:%.*]] = alloca [10 x float]*, align 8
// TCHECK1-NEXT:    [[VLA_ADDR:%.*]] = alloca i64, align 8
// TCHECK1-NEXT:    [[BN_ADDR:%.*]] = alloca float*, align 8
// TCHECK1-NEXT:    [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 8
// TCHECK1-NEXT:    [[VLA_ADDR2:%.*]] = alloca i64, align 8
// TCHECK1-NEXT:    [[VLA_ADDR4:%.*]] = alloca i64, align 8
// TCHECK1-NEXT:    [[CN_ADDR:%.*]] = alloca double*, align 8
// TCHECK1-NEXT:    [[D_ADDR:%.*]] = alloca %struct.TT*, align 8
// TCHECK1-NEXT:    [[B5:%.*]] = alloca [10 x float], align 4
// TCHECK1-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 8
// TCHECK1-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// TCHECK1-NEXT:    [[C7:%.*]] = alloca [5 x [10 x double]], align 8
// TCHECK1-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i64, align 8
// TCHECK1-NEXT:    [[__VLA_EXPR2:%.*]] = alloca i64, align 8
// TCHECK1-NEXT:    [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 8
// TCHECK1-NEXT:    store i64 [[AA]], i64* [[AA_ADDR]], align 8
// TCHECK1-NEXT:    store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8
// TCHECK1-NEXT:    store i64 [[VLA]], i64* [[VLA_ADDR]], align 8
// TCHECK1-NEXT:    store float* [[BN]], float** [[BN_ADDR]], align 8
// TCHECK1-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 8
// TCHECK1-NEXT:    store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8
// TCHECK1-NEXT:    store i64 [[VLA3]], i64* [[VLA_ADDR4]], align 8
// TCHECK1-NEXT:    store double* [[CN]], double** [[CN_ADDR]], align 8
// TCHECK1-NEXT:    store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 8
// TCHECK1-NEXT:    [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16*
// TCHECK1-NEXT:    [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 8
// TCHECK1-NEXT:    [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8
// TCHECK1-NEXT:    [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 8
// TCHECK1-NEXT:    [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 8
// TCHECK1-NEXT:    [[TMP4:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8
// TCHECK1-NEXT:    [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8
// TCHECK1-NEXT:    [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8
// TCHECK1-NEXT:    [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8
// TCHECK1-NEXT:    [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8*
// TCHECK1-NEXT:    [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8*
// TCHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i64 40, i1 false)
// TCHECK1-NEXT:    [[TMP10:%.*]] = call i8* @llvm.stacksave()
// TCHECK1-NEXT:    store i8* [[TMP10]], i8** [[SAVED_STACK]], align 8
// TCHECK1-NEXT:    [[VLA6:%.*]] = alloca float, i64 [[TMP1]], align 4
// TCHECK1-NEXT:    store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8
// TCHECK1-NEXT:    [[TMP11:%.*]] = mul nuw i64 [[TMP1]], 4
// TCHECK1-NEXT:    [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8*
// TCHECK1-NEXT:    [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8*
// TCHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 [[TMP11]], i1 false)
// TCHECK1-NEXT:    [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8*
// TCHECK1-NEXT:    [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8*
// TCHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 400, i1 false)
// TCHECK1-NEXT:    [[TMP16:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]]
// TCHECK1-NEXT:    [[VLA8:%.*]] = alloca double, i64 [[TMP16]], align 8
// TCHECK1-NEXT:    store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8
// TCHECK1-NEXT:    store i64 [[TMP5]], i64* [[__VLA_EXPR2]], align 8
// TCHECK1-NEXT:    [[TMP17:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]]
// TCHECK1-NEXT:    [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 8
// TCHECK1-NEXT:    [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8*
// TCHECK1-NEXT:    [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8*
// TCHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i64 [[TMP18]], i1 false)
// TCHECK1-NEXT:    [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8*
// TCHECK1-NEXT:    [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8*
// TCHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP21]], i8* align 8 [[TMP22]], i64 16, i1 false)
// TCHECK1-NEXT:    [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2
// TCHECK1-NEXT:    [[CONV10:%.*]] = sext i16 [[TMP23]] to i32
// TCHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV10]], 1
// TCHECK1-NEXT:    [[CONV11:%.*]] = trunc i32 [[ADD]] to i16
// TCHECK1-NEXT:    store i16 [[CONV11]], i16* [[CONV]], align 2
// TCHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i64 0, i64 2
// TCHECK1-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX]], align 4
// TCHECK1-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i64 3
// TCHECK1-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX12]], align 4
// TCHECK1-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i64 0, i64 1
// TCHECK1-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i64 0, i64 2
// TCHECK1-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX14]], align 8
// TCHECK1-NEXT:    [[TMP24:%.*]] = mul nsw i64 1, [[TMP5]]
// TCHECK1-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i64 [[TMP24]]
// TCHECK1-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i64 3
// TCHECK1-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX16]], align 8
// TCHECK1-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0
// TCHECK1-NEXT:    store i64 1, i64* [[X]], align 8
// TCHECK1-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1
// TCHECK1-NEXT:    store i8 1, i8* [[Y]], align 8
// TCHECK1-NEXT:    [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
// TCHECK1-NEXT:    call void @llvm.stackrestore(i8* [[TMP25]])
// TCHECK1-NEXT:    ret void
//
//
// TCHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111
// TCHECK1-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR0]] {
// TCHECK1-NEXT:  entry:
// TCHECK1-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 8
// TCHECK1-NEXT:    [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 8
// TCHECK1-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 8
// TCHECK1-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 8
// TCHECK1-NEXT:    [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 8
// TCHECK1-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0:%.*]], %struct.TT.0* [[TMP0]], i32 0, i32 0
// TCHECK1-NEXT:    [[TMP1:%.*]] = load i32, i32* [[X]], align 4
// TCHECK1-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP1]] to double
// TCHECK1-NEXT:    [[TMP2:%.*]] = load double*, double** [[PTR_ADDR]], align 8
// TCHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP2]], i64 0
// TCHECK1-NEXT:    store double [[CONV]], double* [[ARRAYIDX]], align 8
// TCHECK1-NEXT:    [[TMP3:%.*]] = load double*, double** [[PTR_ADDR]], align 8
// TCHECK1-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[TMP3]], i64 0
// TCHECK1-NEXT:    [[TMP4:%.*]] = load double, double* [[ARRAYIDX1]], align 8
// TCHECK1-NEXT:    [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00
// TCHECK1-NEXT:    store double [[INC]], double* [[ARRAYIDX1]], align 8
// TCHECK1-NEXT:    ret void
//
//
// TCHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142
// TCHECK1-SAME: (i64 noundef [[A:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// TCHECK1-NEXT:  entry:
// TCHECK1-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
// TCHECK1-NEXT:    [[AAA_ADDR:%.*]] = alloca i64, align 8
// TCHECK1-NEXT:    [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8
// TCHECK1-NEXT:    [[B2:%.*]] = alloca [10 x i32], align 4
// TCHECK1-NEXT:    store i64 [[A]], i64* [[A_ADDR]], align 8
// TCHECK1-NEXT:    store i64 [[AAA]], i64* [[AAA_ADDR]], align 8
// TCHECK1-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8
// TCHECK1-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
// TCHECK1-NEXT:    [[CONV1:%.*]] = bitcast i64* [[AAA_ADDR]] to i8*
// TCHECK1-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8
// TCHECK1-NEXT:    [[TMP1:%.*]] = bitcast [10 x i32]* [[B2]] to i8*
// TCHECK1-NEXT:    [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
// TCHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false)
// TCHECK1-NEXT:    [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4
// TCHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// TCHECK1-NEXT:    store i32 [[ADD]], i32* [[CONV]], align 4
// TCHECK1-NEXT:    [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 1
// TCHECK1-NEXT:    [[CONV3:%.*]] = sext i8 [[TMP4]] to i32
// TCHECK1-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1
// TCHECK1-NEXT:    [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8
// TCHECK1-NEXT:    store i8 [[CONV5]], i8* [[CONV1]], align 1
// TCHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B2]], i64 0, i64 2
// TCHECK1-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// TCHECK1-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP5]], 1
// TCHECK1-NEXT:    store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4
// TCHECK1-NEXT:    ret void
//
//
// TCHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167
// TCHECK1-SAME: (%struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR0]] {
// TCHECK1-NEXT:  entry:
// TCHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
// TCHECK1-NEXT:    [[B_ADDR:%.*]] = alloca i64, align 8
// TCHECK1-NEXT:    [[VLA_ADDR:%.*]] = alloca i64, align 8
// TCHECK1-NEXT:    [[VLA_ADDR2:%.*]] = alloca i64, align 8
// TCHECK1-NEXT:    [[C_ADDR:%.*]] = alloca i16*, align 8
// TCHECK1-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 8
// TCHECK1-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// TCHECK1-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i64, align 8
// TCHECK1-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
// TCHECK1-NEXT:    store i64 [[B]], i64* [[B_ADDR]], align 8
// TCHECK1-NEXT:    store i64 [[VLA]], i64* [[VLA_ADDR]], align 8
// TCHECK1-NEXT:    store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8
// TCHECK1-NEXT:    store i16* [[C]], i16** [[C_ADDR]], align 8
// TCHECK1-NEXT:    [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
// TCHECK1-NEXT:    [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32*
// TCHECK1-NEXT:    [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8
// TCHECK1-NEXT:    [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8
// TCHECK1-NEXT:    [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8
// TCHECK1-NEXT:    [[TMP4:%.*]] = call i8* @llvm.stacksave()
// TCHECK1-NEXT:    store i8* [[TMP4]], i8** [[SAVED_STACK]], align 8
// TCHECK1-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]]
// TCHECK1-NEXT:    [[VLA3:%.*]] = alloca i16, i64 [[TMP5]], align 2
// TCHECK1-NEXT:    store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8
// TCHECK1-NEXT:    store i64 [[TMP2]], i64* [[__VLA_EXPR1]], align 8
// TCHECK1-NEXT:    [[TMP6:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]]
// TCHECK1-NEXT:    [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 2
// TCHECK1-NEXT:    [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8*
// TCHECK1-NEXT:    [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8*
// TCHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i64 [[TMP7]], i1 false)
// TCHECK1-NEXT:    [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4
// TCHECK1-NEXT:    [[CONV4:%.*]] = sitofp i32 [[TMP10]] to double
// TCHECK1-NEXT:    [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00
// TCHECK1-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0
// TCHECK1-NEXT:    store double [[ADD]], double* [[A]], align 8
// TCHECK1-NEXT:    [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0
// TCHECK1-NEXT:    [[TMP11:%.*]] = load double, double* [[A5]], align 8
// TCHECK1-NEXT:    [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00
// TCHECK1-NEXT:    store double [[INC]], double* [[A5]], align 8
// TCHECK1-NEXT:    [[CONV6:%.*]] = fptosi double [[INC]] to i16
// TCHECK1-NEXT:    [[TMP12:%.*]] = mul nsw i64 1, [[TMP2]]
// TCHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i64 [[TMP12]]
// TCHECK1-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1
// TCHECK1-NEXT:    store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2
// TCHECK1-NEXT:    [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
// TCHECK1-NEXT:    call void @llvm.stackrestore(i8* [[TMP13]])
// TCHECK1-NEXT:    ret void
//
//
// TCHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128
// TCHECK1-SAME: (i64 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// TCHECK1-NEXT:  entry:
// TCHECK1-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
// TCHECK1-NEXT:    [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8
// TCHECK1-NEXT:    [[B1:%.*]] = alloca [10 x i32], align 4
// TCHECK1-NEXT:    store i64 [[A]], i64* [[A_ADDR]], align 8
// TCHECK1-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8
// TCHECK1-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*
// TCHECK1-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8
// TCHECK1-NEXT:    [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8*
// TCHECK1-NEXT:    [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
// TCHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false)
// TCHECK1-NEXT:    [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4
// TCHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// TCHECK1-NEXT:    store i32 [[ADD]], i32* [[CONV]], align 4
// TCHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i64 0, i64 2
// TCHECK1-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// TCHECK1-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1
// TCHECK1-NEXT:    store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4
// TCHECK1-NEXT:    ret void
//
//
// TCHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63
// TCHECK2-SAME: (i32 noundef [[A:%.*]], i32* noundef [[P:%.*]], i32 noundef [[GA:%.*]]) #[[ATTR0:[0-9]+]] {
// TCHECK2-NEXT:  entry:
// TCHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
// TCHECK2-NEXT:    [[P_ADDR:%.*]] = alloca i32*, align 4
// TCHECK2-NEXT:    [[GA_ADDR:%.*]] = alloca i32, align 4
// TCHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
// TCHECK2-NEXT:    store i32* [[P]], i32** [[P_ADDR]], align 4
// TCHECK2-NEXT:    store i32 [[GA]], i32* [[GA_ADDR]], align 4
// TCHECK2-NEXT:    ret void
//
//
// TCHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70
// TCHECK2-SAME: (i32 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR0]] {
// TCHECK2-NEXT:  entry:
// TCHECK2-NEXT:    [[AA_ADDR:%.*]] = alloca i32, align 4
// TCHECK2-NEXT:    [[B_ADDR:%.*]] = alloca [10 x float]*, align 4
// TCHECK2-NEXT:    [[VLA_ADDR:%.*]] = alloca i32, align 4
// TCHECK2-NEXT:    [[BN_ADDR:%.*]] = alloca float*, align 4
// TCHECK2-NEXT:    [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 4
// TCHECK2-NEXT:    [[VLA_ADDR2:%.*]] = alloca i32, align 4
// TCHECK2-NEXT:    [[VLA_ADDR4:%.*]] = alloca i32, align 4
// TCHECK2-NEXT:    [[CN_ADDR:%.*]] = alloca double*, align 4
// TCHECK2-NEXT:    [[D_ADDR:%.*]] = alloca %struct.TT*, align 4
// TCHECK2-NEXT:    [[B5:%.*]] = alloca [10 x float], align 4
// TCHECK2-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 4
// TCHECK2-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// TCHECK2-NEXT:    [[C7:%.*]] = alloca [5 x [10 x double]], align 8
// TCHECK2-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i32, align 4
// TCHECK2-NEXT:    [[__VLA_EXPR2:%.*]] = alloca i32, align 4
// TCHECK2-NEXT:    [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 4
// TCHECK2-NEXT:    store i32 [[AA]], i32* [[AA_ADDR]], align 4
// TCHECK2-NEXT:    store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4
// TCHECK2-NEXT:    store i32 [[VLA]], i32* [[VLA_ADDR]], align 4
// TCHECK2-NEXT:    store float* [[BN]], float** [[BN_ADDR]], align 4
// TCHECK2-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 4
// TCHECK2-NEXT:    store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4
// TCHECK2-NEXT:    store i32 [[VLA3]], i32* [[VLA_ADDR4]], align 4
// TCHECK2-NEXT:    store double* [[CN]], double** [[CN_ADDR]], align 4
// TCHECK2-NEXT:    store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 4
// TCHECK2-NEXT:    [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16*
// TCHECK2-NEXT:    [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 4
// TCHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4
// TCHECK2-NEXT:    [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 4
// TCHECK2-NEXT:    [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 4
// TCHECK2-NEXT:    [[TMP4:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4
// TCHECK2-NEXT:    [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4
// TCHECK2-NEXT:    [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4
// TCHECK2-NEXT:    [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4
// TCHECK2-NEXT:    [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8*
// TCHECK2-NEXT:    [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8*
// TCHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i32 40, i1 false)
// TCHECK2-NEXT:    [[TMP10:%.*]] = call i8* @llvm.stacksave()
// TCHECK2-NEXT:    store i8* [[TMP10]], i8** [[SAVED_STACK]], align 4
// TCHECK2-NEXT:    [[VLA6:%.*]] = alloca float, i32 [[TMP1]], align 4
// TCHECK2-NEXT:    store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4
// TCHECK2-NEXT:    [[TMP11:%.*]] = mul nuw i32 [[TMP1]], 4
// TCHECK2-NEXT:    [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8*
// TCHECK2-NEXT:    [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8*
// TCHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 [[TMP11]], i1 false)
// TCHECK2-NEXT:    [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8*
// TCHECK2-NEXT:    [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8*
// TCHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i32 400, i1 false)
// TCHECK2-NEXT:    [[TMP16:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]]
// TCHECK2-NEXT:    [[VLA8:%.*]] = alloca double, i32 [[TMP16]], align 8
// TCHECK2-NEXT:    store i32 [[TMP4]], i32* [[__VLA_EXPR1]], align 4
// TCHECK2-NEXT:    store i32 [[TMP5]], i32* [[__VLA_EXPR2]], align 4
// TCHECK2-NEXT:    [[TMP17:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]]
// TCHECK2-NEXT:    [[TMP18:%.*]] = mul nuw i32 [[TMP17]], 8
// TCHECK2-NEXT:    [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8*
// TCHECK2-NEXT:    [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8*
// TCHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i32 [[TMP18]], i1 false)
// TCHECK2-NEXT:    [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8*
// TCHECK2-NEXT:    [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8*
// TCHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP21]], i8* align 4 [[TMP22]], i32 12, i1 false)
// TCHECK2-NEXT:    [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2
// TCHECK2-NEXT:    [[CONV10:%.*]] = sext i16 [[TMP23]] to i32
// TCHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV10]], 1
// TCHECK2-NEXT:    [[CONV11:%.*]] = trunc i32 [[ADD]] to i16
// TCHECK2-NEXT:    store i16 [[CONV11]], i16* [[CONV]], align 2
// TCHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i32 0, i32 2
// TCHECK2-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX]], align 4
// TCHECK2-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i32 3
// TCHECK2-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX12]], align 4
// TCHECK2-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i32 0, i32 1
// TCHECK2-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i32 0, i32 2
// TCHECK2-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX14]], align 8
// TCHECK2-NEXT:    [[TMP24:%.*]] = mul nsw i32 1, [[TMP5]]
// TCHECK2-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i32 [[TMP24]]
// TCHECK2-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i32 3
// TCHECK2-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX16]], align 8
// TCHECK2-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0
// TCHECK2-NEXT:    store i64 1, i64* [[X]], align 4
// TCHECK2-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1
// TCHECK2-NEXT:    store i8 1, i8* [[Y]], align 4
// TCHECK2-NEXT:    [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
// TCHECK2-NEXT:    call void @llvm.stackrestore(i8* [[TMP25]])
// TCHECK2-NEXT:    ret void
//
//
// TCHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111
// TCHECK2-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR0]] {
// TCHECK2-NEXT:  entry:
// TCHECK2-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 4
// TCHECK2-NEXT:    [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 4
// TCHECK2-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 4
// TCHECK2-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 4
// TCHECK2-NEXT:    [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 4
// TCHECK2-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0:%.*]], %struct.TT.0* [[TMP0]], i32 0, i32 0
// TCHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[X]], align 4
// TCHECK2-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP1]] to double
// TCHECK2-NEXT:    [[TMP2:%.*]] = load double*, double** [[PTR_ADDR]], align 4
// TCHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP2]], i32 0
// TCHECK2-NEXT:    store double [[CONV]], double* [[ARRAYIDX]], align 4
// TCHECK2-NEXT:    [[TMP3:%.*]] = load double*, double** [[PTR_ADDR]], align 4
// TCHECK2-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[TMP3]], i32 0
// TCHECK2-NEXT:    [[TMP4:%.*]] = load double, double* [[ARRAYIDX1]], align 4
// TCHECK2-NEXT:    [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00
// TCHECK2-NEXT:    store double [[INC]], double* [[ARRAYIDX1]], align 4
// TCHECK2-NEXT:    ret void
//
//
// TCHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142
// TCHECK2-SAME: (i32 noundef [[A:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// TCHECK2-NEXT:  entry:
// TCHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
// TCHECK2-NEXT:    [[AAA_ADDR:%.*]] = alloca i32, align 4
// TCHECK2-NEXT:    [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4
// TCHECK2-NEXT:    [[B1:%.*]] = alloca [10 x i32], align 4
// TCHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
// TCHECK2-NEXT:    store i32 [[AAA]], i32* [[AAA_ADDR]], align 4
// TCHECK2-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4
// TCHECK2-NEXT:    [[CONV:%.*]] = bitcast i32* [[AAA_ADDR]] to i8*
// TCHECK2-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4
// TCHECK2-NEXT:    [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8*
// TCHECK2-NEXT:    [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
// TCHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false)
// TCHECK2-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
// TCHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// TCHECK2-NEXT:    store i32 [[ADD]], i32* [[A_ADDR]], align 4
// TCHECK2-NEXT:    [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1
// TCHECK2-NEXT:    [[CONV2:%.*]] = sext i8 [[TMP4]] to i32
// TCHECK2-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1
// TCHECK2-NEXT:    [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8
// TCHECK2-NEXT:    store i8 [[CONV4]], i8* [[CONV]], align 1
// TCHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2
// TCHECK2-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// TCHECK2-NEXT:    [[ADD5:%.*]] = add nsw i32 [[TMP5]], 1
// TCHECK2-NEXT:    store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4
// TCHECK2-NEXT:    ret void
//
//
// TCHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167
// TCHECK2-SAME: (%struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR0]] {
// TCHECK2-NEXT:  entry:
// TCHECK2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4
// TCHECK2-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
// TCHECK2-NEXT:    [[VLA_ADDR:%.*]] = alloca i32, align 4
// TCHECK2-NEXT:    [[VLA_ADDR2:%.*]] = alloca i32, align 4
// TCHECK2-NEXT:    [[C_ADDR:%.*]] = alloca i16*, align 4
// TCHECK2-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 4
// TCHECK2-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// TCHECK2-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i32, align 4
// TCHECK2-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4
// TCHECK2-NEXT:    store i32 [[B]], i32* [[B_ADDR]], align 4
// TCHECK2-NEXT:    store i32 [[VLA]], i32* [[VLA_ADDR]], align 4
// TCHECK2-NEXT:    store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4
// TCHECK2-NEXT:    store i16* [[C]], i16** [[C_ADDR]], align 4
// TCHECK2-NEXT:    [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4
// TCHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4
// TCHECK2-NEXT:    [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4
// TCHECK2-NEXT:    [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4
// TCHECK2-NEXT:    [[TMP4:%.*]] = call i8* @llvm.stacksave()
// TCHECK2-NEXT:    store i8* [[TMP4]], i8** [[SAVED_STACK]], align 4
// TCHECK2-NEXT:    [[TMP5:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
// TCHECK2-NEXT:    [[VLA3:%.*]] = alloca i16, i32 [[TMP5]], align 2
// TCHECK2-NEXT:    store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4
// TCHECK2-NEXT:    store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4
// TCHECK2-NEXT:    [[TMP6:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
// TCHECK2-NEXT:    [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2
// TCHECK2-NEXT:    [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8*
// TCHECK2-NEXT:    [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8*
// TCHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i32 [[TMP7]], i1 false)
// TCHECK2-NEXT:    [[TMP10:%.*]] = load i32, i32* [[B_ADDR]], align 4
// TCHECK2-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP10]] to double
// TCHECK2-NEXT:    [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00
// TCHECK2-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0
// TCHECK2-NEXT:    store double [[ADD]], double* [[A]], align 4
// TCHECK2-NEXT:    [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0
// TCHECK2-NEXT:    [[TMP11:%.*]] = load double, double* [[A4]], align 4
// TCHECK2-NEXT:    [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00
// TCHECK2-NEXT:    store double [[INC]], double* [[A4]], align 4
// TCHECK2-NEXT:    [[CONV5:%.*]] = fptosi double [[INC]] to i16
// TCHECK2-NEXT:    [[TMP12:%.*]] = mul nsw i32 1, [[TMP2]]
// TCHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i32 [[TMP12]]
// TCHECK2-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1
// TCHECK2-NEXT:    store i16 [[CONV5]], i16* [[ARRAYIDX6]], align 2
// TCHECK2-NEXT:    [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
// TCHECK2-NEXT:    call void @llvm.stackrestore(i8* [[TMP13]])
// TCHECK2-NEXT:    ret void
//
//
// TCHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128
// TCHECK2-SAME: (i32 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// TCHECK2-NEXT:  entry:
// TCHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
// TCHECK2-NEXT:    [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4
// TCHECK2-NEXT:    [[B1:%.*]] = alloca [10 x i32], align 4
// TCHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
// TCHECK2-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4
// TCHECK2-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4
// TCHECK2-NEXT:    [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8*
// TCHECK2-NEXT:    [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
// TCHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false)
// TCHECK2-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
// TCHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// TCHECK2-NEXT:    store i32 [[ADD]], i32* [[A_ADDR]], align 4
// TCHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2
// TCHECK2-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// TCHECK2-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1
// TCHECK2-NEXT:    store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4
// TCHECK2-NEXT:    ret void
//
//
// TCHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63
// TCHECK3-SAME: (i32 noundef [[A:%.*]], i32* noundef [[P:%.*]], i32 noundef [[GA:%.*]]) #[[ATTR0:[0-9]+]] {
// TCHECK3-NEXT:  entry:
// TCHECK3-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
// TCHECK3-NEXT:    [[P_ADDR:%.*]] = alloca i32*, align 4
// TCHECK3-NEXT:    [[GA_ADDR:%.*]] = alloca i32, align 4
// TCHECK3-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
// TCHECK3-NEXT:    store i32* [[P]], i32** [[P_ADDR]], align 4
// TCHECK3-NEXT:    store i32 [[GA]], i32* [[GA_ADDR]], align 4
// TCHECK3-NEXT:    ret void
//
//
// TCHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70
// TCHECK3-SAME: (i32 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR0]] {
// TCHECK3-NEXT:  entry:
// TCHECK3-NEXT:    [[AA_ADDR:%.*]] = alloca i32, align 4
// TCHECK3-NEXT:    [[B_ADDR:%.*]] = alloca [10 x float]*, align 4
// TCHECK3-NEXT:    [[VLA_ADDR:%.*]] = alloca i32, align 4
// TCHECK3-NEXT:    [[BN_ADDR:%.*]] = alloca float*, align 4
// TCHECK3-NEXT:    [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 4
// TCHECK3-NEXT:    [[VLA_ADDR2:%.*]] = alloca i32, align 4
// TCHECK3-NEXT:    [[VLA_ADDR4:%.*]] = alloca i32, align 4
// TCHECK3-NEXT:    [[CN_ADDR:%.*]] = alloca double*, align 4
// TCHECK3-NEXT:    [[D_ADDR:%.*]] = alloca %struct.TT*, align 4
// TCHECK3-NEXT:    [[B5:%.*]] = alloca [10 x float], align 4
// TCHECK3-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 4
// TCHECK3-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// TCHECK3-NEXT:    [[C7:%.*]] = alloca [5 x [10 x double]], align 8
// TCHECK3-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i32, align 4
// TCHECK3-NEXT:    [[__VLA_EXPR2:%.*]] = alloca i32, align 4
// TCHECK3-NEXT:    [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 4
// TCHECK3-NEXT:    store i32 [[AA]], i32* [[AA_ADDR]], align 4
// TCHECK3-NEXT:    store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4
// TCHECK3-NEXT:    store i32 [[VLA]], i32* [[VLA_ADDR]], align 4
// TCHECK3-NEXT:    store float* [[BN]], float** [[BN_ADDR]], align 4
// TCHECK3-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 4
// TCHECK3-NEXT:    store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4
// TCHECK3-NEXT:    store i32 [[VLA3]], i32* [[VLA_ADDR4]], align 4
// TCHECK3-NEXT:    store double* [[CN]], double** [[CN_ADDR]], align 4
// TCHECK3-NEXT:    store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 4
// TCHECK3-NEXT:    [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16*
// TCHECK3-NEXT:    [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 4
// TCHECK3-NEXT:    [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4
// TCHECK3-NEXT:    [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 4
// TCHECK3-NEXT:    [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 4
// TCHECK3-NEXT:    [[TMP4:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4
// TCHECK3-NEXT:    [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4
// TCHECK3-NEXT:    [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4
// TCHECK3-NEXT:    [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4
// TCHECK3-NEXT:    [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8*
// TCHECK3-NEXT:    [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8*
// TCHECK3-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i32 40, i1 false)
// TCHECK3-NEXT:    [[TMP10:%.*]] = call i8* @llvm.stacksave()
// TCHECK3-NEXT:    store i8* [[TMP10]], i8** [[SAVED_STACK]], align 4
// TCHECK3-NEXT:    [[VLA6:%.*]] = alloca float, i32 [[TMP1]], align 4
// TCHECK3-NEXT:    store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4
// TCHECK3-NEXT:    [[TMP11:%.*]] = mul nuw i32 [[TMP1]], 4
// TCHECK3-NEXT:    [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8*
// TCHECK3-NEXT:    [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8*
// TCHECK3-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 [[TMP11]], i1 false)
// TCHECK3-NEXT:    [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8*
// TCHECK3-NEXT:    [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8*
// TCHECK3-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i32 400, i1 false)
// TCHECK3-NEXT:    [[TMP16:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]]
// TCHECK3-NEXT:    [[VLA8:%.*]] = alloca double, i32 [[TMP16]], align 8
// TCHECK3-NEXT:    store i32 [[TMP4]], i32* [[__VLA_EXPR1]], align 4
// TCHECK3-NEXT:    store i32 [[TMP5]], i32* [[__VLA_EXPR2]], align 4
// TCHECK3-NEXT:    [[TMP17:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]]
// TCHECK3-NEXT:    [[TMP18:%.*]] = mul nuw i32 [[TMP17]], 8
// TCHECK3-NEXT:    [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8*
// TCHECK3-NEXT:    [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8*
// TCHECK3-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i32 [[TMP18]], i1 false)
// TCHECK3-NEXT:    [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8*
// TCHECK3-NEXT:    [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8*
// TCHECK3-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP21]], i8* align 4 [[TMP22]], i32 12, i1 false)
// TCHECK3-NEXT:    [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2
// TCHECK3-NEXT:    [[CONV10:%.*]] = sext i16 [[TMP23]] to i32
// TCHECK3-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV10]], 1
// TCHECK3-NEXT:    [[CONV11:%.*]] = trunc i32 [[ADD]] to i16
// TCHECK3-NEXT:    store i16 [[CONV11]], i16* [[CONV]], align 2
// TCHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i32 0, i32 2
// TCHECK3-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX]], align 4
// TCHECK3-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i32 3
// TCHECK3-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX12]], align 4
// TCHECK3-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i32 0, i32 1
// TCHECK3-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i32 0, i32 2
// TCHECK3-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX14]], align 8
// TCHECK3-NEXT:    [[TMP24:%.*]] = mul nsw i32 1, [[TMP5]]
// TCHECK3-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i32 [[TMP24]]
// TCHECK3-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i32 3
// TCHECK3-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX16]], align 8
// TCHECK3-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0
// TCHECK3-NEXT:    store i64 1, i64* [[X]], align 4
// TCHECK3-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1
// TCHECK3-NEXT:    store i8 1, i8* [[Y]], align 4
// TCHECK3-NEXT:    [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
// TCHECK3-NEXT:    call void @llvm.stackrestore(i8* [[TMP25]])
// TCHECK3-NEXT:    ret void
//
//
// TCHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111
// TCHECK3-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR0]] {
// TCHECK3-NEXT:  entry:
// TCHECK3-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 4
// TCHECK3-NEXT:    [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 4
// TCHECK3-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 4
// TCHECK3-NEXT:    store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 4
// TCHECK3-NEXT:    [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 4
// TCHECK3-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0:%.*]], %struct.TT.0* [[TMP0]], i32 0, i32 0
// TCHECK3-NEXT:    [[TMP1:%.*]] = load i32, i32* [[X]], align 4
// TCHECK3-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP1]] to double
// TCHECK3-NEXT:    [[TMP2:%.*]] = load double*, double** [[PTR_ADDR]], align 4
// TCHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP2]], i32 0
// TCHECK3-NEXT:    store double [[CONV]], double* [[ARRAYIDX]], align 4
// TCHECK3-NEXT:    [[TMP3:%.*]] = load double*, double** [[PTR_ADDR]], align 4
// TCHECK3-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[TMP3]], i32 0
// TCHECK3-NEXT:    [[TMP4:%.*]] = load double, double* [[ARRAYIDX1]], align 4
// TCHECK3-NEXT:    [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00
// TCHECK3-NEXT:    store double [[INC]], double* [[ARRAYIDX1]], align 4
// TCHECK3-NEXT:    ret void
//
//
// TCHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142
// TCHECK3-SAME: (i32 noundef [[A:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// TCHECK3-NEXT:  entry:
// TCHECK3-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
// TCHECK3-NEXT:    [[AAA_ADDR:%.*]] = alloca i32, align 4
// TCHECK3-NEXT:    [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4
// TCHECK3-NEXT:    [[B1:%.*]] = alloca [10 x i32], align 4
// TCHECK3-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
// TCHECK3-NEXT:    store i32 [[AAA]], i32* [[AAA_ADDR]], align 4
// TCHECK3-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4
// TCHECK3-NEXT:    [[CONV:%.*]] = bitcast i32* [[AAA_ADDR]] to i8*
// TCHECK3-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4
// TCHECK3-NEXT:    [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8*
// TCHECK3-NEXT:    [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
// TCHECK3-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false)
// TCHECK3-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
// TCHECK3-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// TCHECK3-NEXT:    store i32 [[ADD]], i32* [[A_ADDR]], align 4
// TCHECK3-NEXT:    [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1
// TCHECK3-NEXT:    [[CONV2:%.*]] = sext i8 [[TMP4]] to i32
// TCHECK3-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1
// TCHECK3-NEXT:    [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8
// TCHECK3-NEXT:    store i8 [[CONV4]], i8* [[CONV]], align 1
// TCHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2
// TCHECK3-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// TCHECK3-NEXT:    [[ADD5:%.*]] = add nsw i32 [[TMP5]], 1
// TCHECK3-NEXT:    store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4
// TCHECK3-NEXT:    ret void
//
//
// TCHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167
// TCHECK3-SAME: (%struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR0]] {
// TCHECK3-NEXT:  entry:
// TCHECK3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4
// TCHECK3-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
// TCHECK3-NEXT:    [[VLA_ADDR:%.*]] = alloca i32, align 4
// TCHECK3-NEXT:    [[VLA_ADDR2:%.*]] = alloca i32, align 4
// TCHECK3-NEXT:    [[C_ADDR:%.*]] = alloca i16*, align 4
// TCHECK3-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 4
// TCHECK3-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// TCHECK3-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i32, align 4
// TCHECK3-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4
// TCHECK3-NEXT:    store i32 [[B]], i32* [[B_ADDR]], align 4
// TCHECK3-NEXT:    store i32 [[VLA]], i32* [[VLA_ADDR]], align 4
// TCHECK3-NEXT:    store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4
// TCHECK3-NEXT:    store i16* [[C]], i16** [[C_ADDR]], align 4
// TCHECK3-NEXT:    [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4
// TCHECK3-NEXT:    [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4
// TCHECK3-NEXT:    [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4
// TCHECK3-NEXT:    [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4
// TCHECK3-NEXT:    [[TMP4:%.*]] = call i8* @llvm.stacksave()
// TCHECK3-NEXT:    store i8* [[TMP4]], i8** [[SAVED_STACK]], align 4
// TCHECK3-NEXT:    [[TMP5:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
// TCHECK3-NEXT:    [[VLA3:%.*]] = alloca i16, i32 [[TMP5]], align 2
// TCHECK3-NEXT:    store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4
// TCHECK3-NEXT:    store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4
// TCHECK3-NEXT:    [[TMP6:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
// TCHECK3-NEXT:    [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2
// TCHECK3-NEXT:    [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8*
// TCHECK3-NEXT:    [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8*
// TCHECK3-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i32 [[TMP7]], i1 false)
// TCHECK3-NEXT:    [[TMP10:%.*]] = load i32, i32* [[B_ADDR]], align 4
// TCHECK3-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP10]] to double
// TCHECK3-NEXT:    [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00
// TCHECK3-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0
// TCHECK3-NEXT:    store double [[ADD]], double* [[A]], align 4
// TCHECK3-NEXT:    [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0
// TCHECK3-NEXT:    [[TMP11:%.*]] = load double, double* [[A4]], align 4
// TCHECK3-NEXT:    [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00
// TCHECK3-NEXT:    store double [[INC]], double* [[A4]], align 4
// TCHECK3-NEXT:    [[CONV5:%.*]] = fptosi double [[INC]] to i16
// TCHECK3-NEXT:    [[TMP12:%.*]] = mul nsw i32 1, [[TMP2]]
// TCHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i32 [[TMP12]]
// TCHECK3-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1
// TCHECK3-NEXT:    store i16 [[CONV5]], i16* [[ARRAYIDX6]], align 2
// TCHECK3-NEXT:    [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
// TCHECK3-NEXT:    call void @llvm.stackrestore(i8* [[TMP13]])
// TCHECK3-NEXT:    ret void
//
//
// TCHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128
// TCHECK3-SAME: (i32 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// TCHECK3-NEXT:  entry:
// TCHECK3-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
// TCHECK3-NEXT:    [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4
// TCHECK3-NEXT:    [[B1:%.*]] = alloca [10 x i32], align 4
// TCHECK3-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
// TCHECK3-NEXT:    store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4
// TCHECK3-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4
// TCHECK3-NEXT:    [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8*
// TCHECK3-NEXT:    [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8*
// TCHECK3-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false)
// TCHECK3-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
// TCHECK3-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// TCHECK3-NEXT:    store i32 [[ADD]], i32* [[A_ADDR]], align 4
// TCHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2
// TCHECK3-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// TCHECK3-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1
// TCHECK3-NEXT:    store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4
// TCHECK3-NEXT:    ret void
//
//
// SIMD-ONLY1-LABEL: define {{[^@]+}}@_Z3fooiPd
// SIMD-ONLY1-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
// SIMD-ONLY1-NEXT:  entry:
// SIMD-ONLY1-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY1-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 8
// SIMD-ONLY1-NEXT:    [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY1-NEXT:    [[AA:%.*]] = alloca i16, align 2
// SIMD-ONLY1-NEXT:    [[B:%.*]] = alloca [10 x float], align 4
// SIMD-ONLY1-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 8
// SIMD-ONLY1-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// SIMD-ONLY1-NEXT:    [[C:%.*]] = alloca [5 x [10 x double]], align 8
// SIMD-ONLY1-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i64, align 8
// SIMD-ONLY1-NEXT:    [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 8
// SIMD-ONLY1-NEXT:    [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
// SIMD-ONLY1-NEXT:    [[P:%.*]] = alloca i32*, align 64
// SIMD-ONLY1-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// SIMD-ONLY1-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 8
// SIMD-ONLY1-NEXT:    store i32 0, i32* [[A]], align 4
// SIMD-ONLY1-NEXT:    store i16 0, i16* [[AA]], align 2
// SIMD-ONLY1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY1-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
// SIMD-ONLY1-NEXT:    [[TMP2:%.*]] = call i8* @llvm.stacksave()
// SIMD-ONLY1-NEXT:    store i8* [[TMP2]], i8** [[SAVED_STACK]], align 8
// SIMD-ONLY1-NEXT:    [[VLA:%.*]] = alloca float, i64 [[TMP1]], align 4
// SIMD-ONLY1-NEXT:    store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8
// SIMD-ONLY1-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY1-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
// SIMD-ONLY1-NEXT:    [[TMP5:%.*]] = mul nuw i64 5, [[TMP4]]
// SIMD-ONLY1-NEXT:    [[VLA1:%.*]] = alloca double, i64 [[TMP5]], align 8
// SIMD-ONLY1-NEXT:    store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8
// SIMD-ONLY1-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0
// SIMD-ONLY1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY1-NEXT:    store i32 [[TMP6]], i32* [[X]], align 4
// SIMD-ONLY1-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1
// SIMD-ONLY1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY1-NEXT:    store i32 [[TMP7]], i32* [[Y]], align 4
// SIMD-ONLY1-NEXT:    store i32* [[A]], i32** [[P]], align 64
// SIMD-ONLY1-NEXT:    [[TMP8:%.*]] = load i16, i16* [[AA]], align 2
// SIMD-ONLY1-NEXT:    [[CONV:%.*]] = sext i16 [[TMP8]] to i32
// SIMD-ONLY1-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV]], 1
// SIMD-ONLY1-NEXT:    [[CONV2:%.*]] = trunc i32 [[ADD]] to i16
// SIMD-ONLY1-NEXT:    store i16 [[CONV2]], i16* [[AA]], align 2
// SIMD-ONLY1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B]], i64 0, i64 2
// SIMD-ONLY1-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX]], align 4
// SIMD-ONLY1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[VLA]], i64 3
// SIMD-ONLY1-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX3]], align 4
// SIMD-ONLY1-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C]], i64 0, i64 1
// SIMD-ONLY1-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX4]], i64 0, i64 2
// SIMD-ONLY1-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX5]], align 8
// SIMD-ONLY1-NEXT:    [[TMP9:%.*]] = mul nsw i64 1, [[TMP4]]
// SIMD-ONLY1-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[VLA1]], i64 [[TMP9]]
// SIMD-ONLY1-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX6]], i64 3
// SIMD-ONLY1-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX7]], align 8
// SIMD-ONLY1-NEXT:    [[X8:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D]], i32 0, i32 0
// SIMD-ONLY1-NEXT:    store i64 1, i64* [[X8]], align 8
// SIMD-ONLY1-NEXT:    [[Y9:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D]], i32 0, i32 1
// SIMD-ONLY1-NEXT:    store i8 1, i8* [[Y9]], align 8
// SIMD-ONLY1-NEXT:    [[X10:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0
// SIMD-ONLY1-NEXT:    [[TMP10:%.*]] = load i32, i32* [[X10]], align 4
// SIMD-ONLY1-NEXT:    [[CONV11:%.*]] = sitofp i32 [[TMP10]] to double
// SIMD-ONLY1-NEXT:    [[TMP11:%.*]] = load double*, double** [[PTR_ADDR]], align 8
// SIMD-ONLY1-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds double, double* [[TMP11]], i64 0
// SIMD-ONLY1-NEXT:    store double [[CONV11]], double* [[ARRAYIDX12]], align 8
// SIMD-ONLY1-NEXT:    [[TMP12:%.*]] = load double*, double** [[PTR_ADDR]], align 8
// SIMD-ONLY1-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP12]], i64 0
// SIMD-ONLY1-NEXT:    [[TMP13:%.*]] = load double, double* [[ARRAYIDX13]], align 8
// SIMD-ONLY1-NEXT:    [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00
// SIMD-ONLY1-NEXT:    store double [[INC]], double* [[ARRAYIDX13]], align 8
// SIMD-ONLY1-NEXT:    [[TMP14:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY1-NEXT:    [[TMP15:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
// SIMD-ONLY1-NEXT:    call void @llvm.stackrestore(i8* [[TMP15]])
// SIMD-ONLY1-NEXT:    ret i32 [[TMP14]]
//
//
// SIMD-ONLY1-LABEL: define {{[^@]+}}@_Z3bariPd
// SIMD-ONLY1-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] {
// SIMD-ONLY1-NEXT:  entry:
// SIMD-ONLY1-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY1-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 8
// SIMD-ONLY1-NEXT:    [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY1-NEXT:    [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 8
// SIMD-ONLY1-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// SIMD-ONLY1-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 8
// SIMD-ONLY1-NEXT:    store i32 0, i32* [[A]], align 4
// SIMD-ONLY1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY1-NEXT:    [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 8
// SIMD-ONLY1-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z3fooiPd(i32 noundef signext [[TMP0]], double* noundef [[TMP1]])
// SIMD-ONLY1-NEXT:    [[TMP2:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]]
// SIMD-ONLY1-NEXT:    store i32 [[ADD]], i32* [[A]], align 4
// SIMD-ONLY1-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY1-NEXT:    [[CALL1:%.*]] = call noundef signext i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 8 dereferenceable(8) [[S]], i32 noundef signext [[TMP3]])
// SIMD-ONLY1-NEXT:    [[TMP4:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY1-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]]
// SIMD-ONLY1-NEXT:    store i32 [[ADD2]], i32* [[A]], align 4
// SIMD-ONLY1-NEXT:    [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY1-NEXT:    [[CALL3:%.*]] = call noundef signext i32 @_ZL7fstatici(i32 noundef signext [[TMP5]])
// SIMD-ONLY1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY1-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]]
// SIMD-ONLY1-NEXT:    store i32 [[ADD4]], i32* [[A]], align 4
// SIMD-ONLY1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY1-NEXT:    [[CALL5:%.*]] = call noundef signext i32 @_Z9ftemplateIiET_i(i32 noundef signext [[TMP7]])
// SIMD-ONLY1-NEXT:    [[TMP8:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY1-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]]
// SIMD-ONLY1-NEXT:    store i32 [[ADD6]], i32* [[A]], align 4
// SIMD-ONLY1-NEXT:    [[TMP9:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY1-NEXT:    ret i32 [[TMP9]]
//
//
// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN2S12r1Ei
// SIMD-ONLY1-SAME: (%struct.S1* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat align 2 {
// SIMD-ONLY1-NEXT:  entry:
// SIMD-ONLY1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
// SIMD-ONLY1-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY1-NEXT:    [[B:%.*]] = alloca i32, align 4
// SIMD-ONLY1-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 8
// SIMD-ONLY1-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// SIMD-ONLY1-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
// SIMD-ONLY1-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// SIMD-ONLY1-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
// SIMD-ONLY1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// SIMD-ONLY1-NEXT:    store i32 [[ADD]], i32* [[B]], align 4
// SIMD-ONLY1-NEXT:    [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY1-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
// SIMD-ONLY1-NEXT:    [[TMP3:%.*]] = call i8* @llvm.stacksave()
// SIMD-ONLY1-NEXT:    store i8* [[TMP3]], i8** [[SAVED_STACK]], align 8
// SIMD-ONLY1-NEXT:    [[TMP4:%.*]] = mul nuw i64 2, [[TMP2]]
// SIMD-ONLY1-NEXT:    [[VLA:%.*]] = alloca i16, i64 [[TMP4]], align 2
// SIMD-ONLY1-NEXT:    store i64 [[TMP2]], i64* [[__VLA_EXPR0]], align 8
// SIMD-ONLY1-NEXT:    [[TMP5:%.*]] = load i32, i32* [[B]], align 4
// SIMD-ONLY1-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP5]] to double
// SIMD-ONLY1-NEXT:    [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00
// SIMD-ONLY1-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
// SIMD-ONLY1-NEXT:    store double [[ADD2]], double* [[A]], align 8
// SIMD-ONLY1-NEXT:    [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[THIS1]], i32 0, i32 0
// SIMD-ONLY1-NEXT:    [[TMP6:%.*]] = load double, double* [[A3]], align 8
// SIMD-ONLY1-NEXT:    [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00
// SIMD-ONLY1-NEXT:    store double [[INC]], double* [[A3]], align 8
// SIMD-ONLY1-NEXT:    [[CONV4:%.*]] = fptosi double [[INC]] to i16
// SIMD-ONLY1-NEXT:    [[TMP7:%.*]] = mul nsw i64 1, [[TMP2]]
// SIMD-ONLY1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP7]]
// SIMD-ONLY1-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1
// SIMD-ONLY1-NEXT:    store i16 [[CONV4]], i16* [[ARRAYIDX5]], align 2
// SIMD-ONLY1-NEXT:    [[TMP8:%.*]] = mul nsw i64 1, [[TMP2]]
// SIMD-ONLY1-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP8]]
// SIMD-ONLY1-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX6]], i64 1
// SIMD-ONLY1-NEXT:    [[TMP9:%.*]] = load i16, i16* [[ARRAYIDX7]], align 2
// SIMD-ONLY1-NEXT:    [[CONV8:%.*]] = sext i16 [[TMP9]] to i32
// SIMD-ONLY1-NEXT:    [[TMP10:%.*]] = load i32, i32* [[B]], align 4
// SIMD-ONLY1-NEXT:    [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[TMP10]]
// SIMD-ONLY1-NEXT:    [[TMP11:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
// SIMD-ONLY1-NEXT:    call void @llvm.stackrestore(i8* [[TMP11]])
// SIMD-ONLY1-NEXT:    ret i32 [[ADD9]]
//
//
// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZL7fstatici
// SIMD-ONLY1-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] {
// SIMD-ONLY1-NEXT:  entry:
// SIMD-ONLY1-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY1-NEXT:    [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY1-NEXT:    [[AAA:%.*]] = alloca i8, align 1
// SIMD-ONLY1-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
// SIMD-ONLY1-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// SIMD-ONLY1-NEXT:    store i32 0, i32* [[A]], align 4
// SIMD-ONLY1-NEXT:    store i8 0, i8* [[AAA]], align 1
// SIMD-ONLY1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// SIMD-ONLY1-NEXT:    store i32 [[ADD]], i32* [[A]], align 4
// SIMD-ONLY1-NEXT:    [[TMP1:%.*]] = load i8, i8* [[AAA]], align 1
// SIMD-ONLY1-NEXT:    [[CONV:%.*]] = sext i8 [[TMP1]] to i32
// SIMD-ONLY1-NEXT:    [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
// SIMD-ONLY1-NEXT:    [[CONV2:%.*]] = trunc i32 [[ADD1]] to i8
// SIMD-ONLY1-NEXT:    store i8 [[CONV2]], i8* [[AAA]], align 1
// SIMD-ONLY1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B]], i64 0, i64 2
// SIMD-ONLY1-NEXT:    [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// SIMD-ONLY1-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP2]], 1
// SIMD-ONLY1-NEXT:    store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4
// SIMD-ONLY1-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY1-NEXT:    ret i32 [[TMP3]]
//
//
// SIMD-ONLY1-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i
// SIMD-ONLY1-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat {
// SIMD-ONLY1-NEXT:  entry:
// SIMD-ONLY1-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY1-NEXT:    [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY1-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
// SIMD-ONLY1-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// SIMD-ONLY1-NEXT:    store i32 0, i32* [[A]], align 4
// SIMD-ONLY1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// SIMD-ONLY1-NEXT:    store i32 [[ADD]], i32* [[A]], align 4
// SIMD-ONLY1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B]], i64 0, i64 2
// SIMD-ONLY1-NEXT:    [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// SIMD-ONLY1-NEXT:    [[ADD1:%.*]] = add nsw i32 [[TMP1]], 1
// SIMD-ONLY1-NEXT:    store i32 [[ADD1]], i32* [[ARRAYIDX]], align 4
// SIMD-ONLY1-NEXT:    [[TMP2:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY1-NEXT:    ret i32 [[TMP2]]
//
//
// SIMD-ONLY11-LABEL: define {{[^@]+}}@_Z3fooiPd
// SIMD-ONLY11-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
// SIMD-ONLY11-NEXT:  entry:
// SIMD-ONLY11-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY11-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 8
// SIMD-ONLY11-NEXT:    [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY11-NEXT:    [[AA:%.*]] = alloca i16, align 2
// SIMD-ONLY11-NEXT:    [[B:%.*]] = alloca [10 x float], align 4
// SIMD-ONLY11-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 8
// SIMD-ONLY11-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// SIMD-ONLY11-NEXT:    [[C:%.*]] = alloca [5 x [10 x double]], align 8
// SIMD-ONLY11-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i64, align 8
// SIMD-ONLY11-NEXT:    [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 8
// SIMD-ONLY11-NEXT:    [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
// SIMD-ONLY11-NEXT:    [[P:%.*]] = alloca i32*, align 64
// SIMD-ONLY11-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// SIMD-ONLY11-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 8
// SIMD-ONLY11-NEXT:    store i32 0, i32* [[A]], align 4
// SIMD-ONLY11-NEXT:    store i16 0, i16* [[AA]], align 2
// SIMD-ONLY11-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY11-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
// SIMD-ONLY11-NEXT:    [[TMP2:%.*]] = call i8* @llvm.stacksave()
// SIMD-ONLY11-NEXT:    store i8* [[TMP2]], i8** [[SAVED_STACK]], align 8
// SIMD-ONLY11-NEXT:    [[VLA:%.*]] = alloca float, i64 [[TMP1]], align 4
// SIMD-ONLY11-NEXT:    store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8
// SIMD-ONLY11-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY11-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
// SIMD-ONLY11-NEXT:    [[TMP5:%.*]] = mul nuw i64 5, [[TMP4]]
// SIMD-ONLY11-NEXT:    [[VLA1:%.*]] = alloca double, i64 [[TMP5]], align 8
// SIMD-ONLY11-NEXT:    store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8
// SIMD-ONLY11-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0
// SIMD-ONLY11-NEXT:    [[TMP6:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY11-NEXT:    store i32 [[TMP6]], i32* [[X]], align 4
// SIMD-ONLY11-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1
// SIMD-ONLY11-NEXT:    [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY11-NEXT:    store i32 [[TMP7]], i32* [[Y]], align 4
// SIMD-ONLY11-NEXT:    store i32* [[A]], i32** [[P]], align 64
// SIMD-ONLY11-NEXT:    [[TMP8:%.*]] = load i16, i16* [[AA]], align 2
// SIMD-ONLY11-NEXT:    [[CONV:%.*]] = sext i16 [[TMP8]] to i32
// SIMD-ONLY11-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV]], 1
// SIMD-ONLY11-NEXT:    [[CONV2:%.*]] = trunc i32 [[ADD]] to i16
// SIMD-ONLY11-NEXT:    store i16 [[CONV2]], i16* [[AA]], align 2
// SIMD-ONLY11-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B]], i64 0, i64 2
// SIMD-ONLY11-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX]], align 4
// SIMD-ONLY11-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[VLA]], i64 3
// SIMD-ONLY11-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX3]], align 4
// SIMD-ONLY11-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C]], i64 0, i64 1
// SIMD-ONLY11-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX4]], i64 0, i64 2
// SIMD-ONLY11-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX5]], align 8
// SIMD-ONLY11-NEXT:    [[TMP9:%.*]] = mul nsw i64 1, [[TMP4]]
// SIMD-ONLY11-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[VLA1]], i64 [[TMP9]]
// SIMD-ONLY11-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX6]], i64 3
// SIMD-ONLY11-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX7]], align 8
// SIMD-ONLY11-NEXT:    [[X8:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D]], i32 0, i32 0
// SIMD-ONLY11-NEXT:    store i64 1, i64* [[X8]], align 8
// SIMD-ONLY11-NEXT:    [[Y9:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D]], i32 0, i32 1
// SIMD-ONLY11-NEXT:    store i8 1, i8* [[Y9]], align 8
// SIMD-ONLY11-NEXT:    [[X10:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0
// SIMD-ONLY11-NEXT:    [[TMP10:%.*]] = load i32, i32* [[X10]], align 4
// SIMD-ONLY11-NEXT:    [[CONV11:%.*]] = sitofp i32 [[TMP10]] to double
// SIMD-ONLY11-NEXT:    [[TMP11:%.*]] = load double*, double** [[PTR_ADDR]], align 8
// SIMD-ONLY11-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds double, double* [[TMP11]], i64 0
// SIMD-ONLY11-NEXT:    store double [[CONV11]], double* [[ARRAYIDX12]], align 8
// SIMD-ONLY11-NEXT:    [[TMP12:%.*]] = load double*, double** [[PTR_ADDR]], align 8
// SIMD-ONLY11-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP12]], i64 0
// SIMD-ONLY11-NEXT:    [[TMP13:%.*]] = load double, double* [[ARRAYIDX13]], align 8
// SIMD-ONLY11-NEXT:    [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00
// SIMD-ONLY11-NEXT:    store double [[INC]], double* [[ARRAYIDX13]], align 8
// SIMD-ONLY11-NEXT:    [[TMP14:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY11-NEXT:    [[TMP15:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
// SIMD-ONLY11-NEXT:    call void @llvm.stackrestore(i8* [[TMP15]])
// SIMD-ONLY11-NEXT:    ret i32 [[TMP14]]
//
//
// SIMD-ONLY11-LABEL: define {{[^@]+}}@_Z3bariPd
// SIMD-ONLY11-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] {
// SIMD-ONLY11-NEXT:  entry:
// SIMD-ONLY11-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY11-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 8
// SIMD-ONLY11-NEXT:    [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY11-NEXT:    [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 8
// SIMD-ONLY11-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// SIMD-ONLY11-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 8
// SIMD-ONLY11-NEXT:    store i32 0, i32* [[A]], align 4
// SIMD-ONLY11-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY11-NEXT:    [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 8
// SIMD-ONLY11-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z3fooiPd(i32 noundef signext [[TMP0]], double* noundef [[TMP1]])
// SIMD-ONLY11-NEXT:    [[TMP2:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY11-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]]
// SIMD-ONLY11-NEXT:    store i32 [[ADD]], i32* [[A]], align 4
// SIMD-ONLY11-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY11-NEXT:    [[CALL1:%.*]] = call noundef signext i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 8 dereferenceable(8) [[S]], i32 noundef signext [[TMP3]])
// SIMD-ONLY11-NEXT:    [[TMP4:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY11-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]]
// SIMD-ONLY11-NEXT:    store i32 [[ADD2]], i32* [[A]], align 4
// SIMD-ONLY11-NEXT:    [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY11-NEXT:    [[CALL3:%.*]] = call noundef signext i32 @_ZL7fstatici(i32 noundef signext [[TMP5]])
// SIMD-ONLY11-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY11-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]]
// SIMD-ONLY11-NEXT:    store i32 [[ADD4]], i32* [[A]], align 4
// SIMD-ONLY11-NEXT:    [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY11-NEXT:    [[CALL5:%.*]] = call noundef signext i32 @_Z9ftemplateIiET_i(i32 noundef signext [[TMP7]])
// SIMD-ONLY11-NEXT:    [[TMP8:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY11-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]]
// SIMD-ONLY11-NEXT:    store i32 [[ADD6]], i32* [[A]], align 4
// SIMD-ONLY11-NEXT:    [[TMP9:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY11-NEXT:    ret i32 [[TMP9]]
//
//
// SIMD-ONLY11-LABEL: define {{[^@]+}}@_ZN2S12r1Ei
// SIMD-ONLY11-SAME: (%struct.S1* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat align 2 {
// SIMD-ONLY11-NEXT:  entry:
// SIMD-ONLY11-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8
// SIMD-ONLY11-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY11-NEXT:    [[B:%.*]] = alloca i32, align 4
// SIMD-ONLY11-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 8
// SIMD-ONLY11-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
// SIMD-ONLY11-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8
// SIMD-ONLY11-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// SIMD-ONLY11-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8
// SIMD-ONLY11-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY11-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// SIMD-ONLY11-NEXT:    store i32 [[ADD]], i32* [[B]], align 4
// SIMD-ONLY11-NEXT:    [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY11-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
// SIMD-ONLY11-NEXT:    [[TMP3:%.*]] = call i8* @llvm.stacksave()
// SIMD-ONLY11-NEXT:    store i8* [[TMP3]], i8** [[SAVED_STACK]], align 8
// SIMD-ONLY11-NEXT:    [[TMP4:%.*]] = mul nuw i64 2, [[TMP2]]
// SIMD-ONLY11-NEXT:    [[VLA:%.*]] = alloca i16, i64 [[TMP4]], align 2
// SIMD-ONLY11-NEXT:    store i64 [[TMP2]], i64* [[__VLA_EXPR0]], align 8
// SIMD-ONLY11-NEXT:    [[TMP5:%.*]] = load i32, i32* [[B]], align 4
// SIMD-ONLY11-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP5]] to double
// SIMD-ONLY11-NEXT:    [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00
// SIMD-ONLY11-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
// SIMD-ONLY11-NEXT:    store double [[ADD2]], double* [[A]], align 8
// SIMD-ONLY11-NEXT:    [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[THIS1]], i32 0, i32 0
// SIMD-ONLY11-NEXT:    [[TMP6:%.*]] = load double, double* [[A3]], align 8
// SIMD-ONLY11-NEXT:    [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00
// SIMD-ONLY11-NEXT:    store double [[INC]], double* [[A3]], align 8
// SIMD-ONLY11-NEXT:    [[CONV4:%.*]] = fptosi double [[INC]] to i16
// SIMD-ONLY11-NEXT:    [[TMP7:%.*]] = mul nsw i64 1, [[TMP2]]
// SIMD-ONLY11-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP7]]
// SIMD-ONLY11-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1
// SIMD-ONLY11-NEXT:    store i16 [[CONV4]], i16* [[ARRAYIDX5]], align 2
// SIMD-ONLY11-NEXT:    [[TMP8:%.*]] = mul nsw i64 1, [[TMP2]]
// SIMD-ONLY11-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP8]]
// SIMD-ONLY11-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX6]], i64 1
// SIMD-ONLY11-NEXT:    [[TMP9:%.*]] = load i16, i16* [[ARRAYIDX7]], align 2
// SIMD-ONLY11-NEXT:    [[CONV8:%.*]] = sext i16 [[TMP9]] to i32
// SIMD-ONLY11-NEXT:    [[TMP10:%.*]] = load i32, i32* [[B]], align 4
// SIMD-ONLY11-NEXT:    [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[TMP10]]
// SIMD-ONLY11-NEXT:    [[TMP11:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
// SIMD-ONLY11-NEXT:    call void @llvm.stackrestore(i8* [[TMP11]])
// SIMD-ONLY11-NEXT:    ret i32 [[ADD9]]
//
//
// SIMD-ONLY11-LABEL: define {{[^@]+}}@_ZL7fstatici
// SIMD-ONLY11-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] {
// SIMD-ONLY11-NEXT:  entry:
// SIMD-ONLY11-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY11-NEXT:    [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY11-NEXT:    [[AAA:%.*]] = alloca i8, align 1
// SIMD-ONLY11-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
// SIMD-ONLY11-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// SIMD-ONLY11-NEXT:    store i32 0, i32* [[A]], align 4
// SIMD-ONLY11-NEXT:    store i8 0, i8* [[AAA]], align 1
// SIMD-ONLY11-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY11-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// SIMD-ONLY11-NEXT:    store i32 [[ADD]], i32* [[A]], align 4
// SIMD-ONLY11-NEXT:    [[TMP1:%.*]] = load i8, i8* [[AAA]], align 1
// SIMD-ONLY11-NEXT:    [[CONV:%.*]] = sext i8 [[TMP1]] to i32
// SIMD-ONLY11-NEXT:    [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
// SIMD-ONLY11-NEXT:    [[CONV2:%.*]] = trunc i32 [[ADD1]] to i8
// SIMD-ONLY11-NEXT:    store i8 [[CONV2]], i8* [[AAA]], align 1
// SIMD-ONLY11-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B]], i64 0, i64 2
// SIMD-ONLY11-NEXT:    [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// SIMD-ONLY11-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP2]], 1
// SIMD-ONLY11-NEXT:    store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4
// SIMD-ONLY11-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY11-NEXT:    ret i32 [[TMP3]]
//
//
// SIMD-ONLY11-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i
// SIMD-ONLY11-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat {
// SIMD-ONLY11-NEXT:  entry:
// SIMD-ONLY11-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY11-NEXT:    [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY11-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
// SIMD-ONLY11-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// SIMD-ONLY11-NEXT:    store i32 0, i32* [[A]], align 4
// SIMD-ONLY11-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY11-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// SIMD-ONLY11-NEXT:    store i32 [[ADD]], i32* [[A]], align 4
// SIMD-ONLY11-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B]], i64 0, i64 2
// SIMD-ONLY11-NEXT:    [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// SIMD-ONLY11-NEXT:    [[ADD1:%.*]] = add nsw i32 [[TMP1]], 1
// SIMD-ONLY11-NEXT:    store i32 [[ADD1]], i32* [[ARRAYIDX]], align 4
// SIMD-ONLY11-NEXT:    [[TMP2:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY11-NEXT:    ret i32 [[TMP2]]
//
//
// SIMD-ONLY12-LABEL: define {{[^@]+}}@_Z3fooiPd
// SIMD-ONLY12-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
// SIMD-ONLY12-NEXT:  entry:
// SIMD-ONLY12-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY12-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 4
// SIMD-ONLY12-NEXT:    [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY12-NEXT:    [[AA:%.*]] = alloca i16, align 2
// SIMD-ONLY12-NEXT:    [[B:%.*]] = alloca [10 x float], align 4
// SIMD-ONLY12-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 4
// SIMD-ONLY12-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// SIMD-ONLY12-NEXT:    [[C:%.*]] = alloca [5 x [10 x double]], align 8
// SIMD-ONLY12-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i32, align 4
// SIMD-ONLY12-NEXT:    [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 4
// SIMD-ONLY12-NEXT:    [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
// SIMD-ONLY12-NEXT:    [[P:%.*]] = alloca i32*, align 64
// SIMD-ONLY12-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// SIMD-ONLY12-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 4
// SIMD-ONLY12-NEXT:    store i32 0, i32* [[A]], align 4
// SIMD-ONLY12-NEXT:    store i16 0, i16* [[AA]], align 2
// SIMD-ONLY12-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY12-NEXT:    [[TMP1:%.*]] = call i8* @llvm.stacksave()
// SIMD-ONLY12-NEXT:    store i8* [[TMP1]], i8** [[SAVED_STACK]], align 4
// SIMD-ONLY12-NEXT:    [[VLA:%.*]] = alloca float, i32 [[TMP0]], align 4
// SIMD-ONLY12-NEXT:    store i32 [[TMP0]], i32* [[__VLA_EXPR0]], align 4
// SIMD-ONLY12-NEXT:    [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY12-NEXT:    [[TMP3:%.*]] = mul nuw i32 5, [[TMP2]]
// SIMD-ONLY12-NEXT:    [[VLA1:%.*]] = alloca double, i32 [[TMP3]], align 8
// SIMD-ONLY12-NEXT:    store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4
// SIMD-ONLY12-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0
// SIMD-ONLY12-NEXT:    [[TMP4:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY12-NEXT:    store i32 [[TMP4]], i32* [[X]], align 4
// SIMD-ONLY12-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1
// SIMD-ONLY12-NEXT:    [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY12-NEXT:    store i32 [[TMP5]], i32* [[Y]], align 4
// SIMD-ONLY12-NEXT:    store i32* [[A]], i32** [[P]], align 64
// SIMD-ONLY12-NEXT:    [[TMP6:%.*]] = load i16, i16* [[AA]], align 2
// SIMD-ONLY12-NEXT:    [[CONV:%.*]] = sext i16 [[TMP6]] to i32
// SIMD-ONLY12-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV]], 1
// SIMD-ONLY12-NEXT:    [[CONV2:%.*]] = trunc i32 [[ADD]] to i16
// SIMD-ONLY12-NEXT:    store i16 [[CONV2]], i16* [[AA]], align 2
// SIMD-ONLY12-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B]], i32 0, i32 2
// SIMD-ONLY12-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX]], align 4
// SIMD-ONLY12-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[VLA]], i32 3
// SIMD-ONLY12-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX3]], align 4
// SIMD-ONLY12-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C]], i32 0, i32 1
// SIMD-ONLY12-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX4]], i32 0, i32 2
// SIMD-ONLY12-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX5]], align 8
// SIMD-ONLY12-NEXT:    [[TMP7:%.*]] = mul nsw i32 1, [[TMP2]]
// SIMD-ONLY12-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[VLA1]], i32 [[TMP7]]
// SIMD-ONLY12-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX6]], i32 3
// SIMD-ONLY12-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX7]], align 8
// SIMD-ONLY12-NEXT:    [[X8:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D]], i32 0, i32 0
// SIMD-ONLY12-NEXT:    store i64 1, i64* [[X8]], align 4
// SIMD-ONLY12-NEXT:    [[Y9:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D]], i32 0, i32 1
// SIMD-ONLY12-NEXT:    store i8 1, i8* [[Y9]], align 4
// SIMD-ONLY12-NEXT:    [[X10:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0
// SIMD-ONLY12-NEXT:    [[TMP8:%.*]] = load i32, i32* [[X10]], align 4
// SIMD-ONLY12-NEXT:    [[CONV11:%.*]] = sitofp i32 [[TMP8]] to double
// SIMD-ONLY12-NEXT:    [[TMP9:%.*]] = load double*, double** [[PTR_ADDR]], align 4
// SIMD-ONLY12-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds double, double* [[TMP9]], i32 0
// SIMD-ONLY12-NEXT:    store double [[CONV11]], double* [[ARRAYIDX12]], align 4
// SIMD-ONLY12-NEXT:    [[TMP10:%.*]] = load double*, double** [[PTR_ADDR]], align 4
// SIMD-ONLY12-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP10]], i32 0
// SIMD-ONLY12-NEXT:    [[TMP11:%.*]] = load double, double* [[ARRAYIDX13]], align 4
// SIMD-ONLY12-NEXT:    [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00
// SIMD-ONLY12-NEXT:    store double [[INC]], double* [[ARRAYIDX13]], align 4
// SIMD-ONLY12-NEXT:    [[TMP12:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY12-NEXT:    [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
// SIMD-ONLY12-NEXT:    call void @llvm.stackrestore(i8* [[TMP13]])
// SIMD-ONLY12-NEXT:    ret i32 [[TMP12]]
//
//
// SIMD-ONLY12-LABEL: define {{[^@]+}}@_Z3bariPd
// SIMD-ONLY12-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] {
// SIMD-ONLY12-NEXT:  entry:
// SIMD-ONLY12-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY12-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 4
// SIMD-ONLY12-NEXT:    [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY12-NEXT:    [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 4
// SIMD-ONLY12-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// SIMD-ONLY12-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 4
// SIMD-ONLY12-NEXT:    store i32 0, i32* [[A]], align 4
// SIMD-ONLY12-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY12-NEXT:    [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 4
// SIMD-ONLY12-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z3fooiPd(i32 noundef [[TMP0]], double* noundef [[TMP1]])
// SIMD-ONLY12-NEXT:    [[TMP2:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY12-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]]
// SIMD-ONLY12-NEXT:    store i32 [[ADD]], i32* [[A]], align 4
// SIMD-ONLY12-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY12-NEXT:    [[CALL1:%.*]] = call noundef i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 4 dereferenceable(8) [[S]], i32 noundef [[TMP3]])
// SIMD-ONLY12-NEXT:    [[TMP4:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY12-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]]
// SIMD-ONLY12-NEXT:    store i32 [[ADD2]], i32* [[A]], align 4
// SIMD-ONLY12-NEXT:    [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY12-NEXT:    [[CALL3:%.*]] = call noundef i32 @_ZL7fstatici(i32 noundef [[TMP5]])
// SIMD-ONLY12-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY12-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]]
// SIMD-ONLY12-NEXT:    store i32 [[ADD4]], i32* [[A]], align 4
// SIMD-ONLY12-NEXT:    [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY12-NEXT:    [[CALL5:%.*]] = call noundef i32 @_Z9ftemplateIiET_i(i32 noundef [[TMP7]])
// SIMD-ONLY12-NEXT:    [[TMP8:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY12-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]]
// SIMD-ONLY12-NEXT:    store i32 [[ADD6]], i32* [[A]], align 4
// SIMD-ONLY12-NEXT:    [[TMP9:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY12-NEXT:    ret i32 [[TMP9]]
//
//
// SIMD-ONLY12-LABEL: define {{[^@]+}}@_ZN2S12r1Ei
// SIMD-ONLY12-SAME: (%struct.S1* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[N:%.*]]) #[[ATTR0]] comdat align 2 {
// SIMD-ONLY12-NEXT:  entry:
// SIMD-ONLY12-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4
// SIMD-ONLY12-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY12-NEXT:    [[B:%.*]] = alloca i32, align 4
// SIMD-ONLY12-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 4
// SIMD-ONLY12-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// SIMD-ONLY12-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4
// SIMD-ONLY12-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// SIMD-ONLY12-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4
// SIMD-ONLY12-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY12-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// SIMD-ONLY12-NEXT:    store i32 [[ADD]], i32* [[B]], align 4
// SIMD-ONLY12-NEXT:    [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY12-NEXT:    [[TMP2:%.*]] = call i8* @llvm.stacksave()
// SIMD-ONLY12-NEXT:    store i8* [[TMP2]], i8** [[SAVED_STACK]], align 4
// SIMD-ONLY12-NEXT:    [[TMP3:%.*]] = mul nuw i32 2, [[TMP1]]
// SIMD-ONLY12-NEXT:    [[VLA:%.*]] = alloca i16, i32 [[TMP3]], align 2
// SIMD-ONLY12-NEXT:    store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4
// SIMD-ONLY12-NEXT:    [[TMP4:%.*]] = load i32, i32* [[B]], align 4
// SIMD-ONLY12-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP4]] to double
// SIMD-ONLY12-NEXT:    [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00
// SIMD-ONLY12-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
// SIMD-ONLY12-NEXT:    store double [[ADD2]], double* [[A]], align 4
// SIMD-ONLY12-NEXT:    [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[THIS1]], i32 0, i32 0
// SIMD-ONLY12-NEXT:    [[TMP5:%.*]] = load double, double* [[A3]], align 4
// SIMD-ONLY12-NEXT:    [[INC:%.*]] = fadd double [[TMP5]], 1.000000e+00
// SIMD-ONLY12-NEXT:    store double [[INC]], double* [[A3]], align 4
// SIMD-ONLY12-NEXT:    [[CONV4:%.*]] = fptosi double [[INC]] to i16
// SIMD-ONLY12-NEXT:    [[TMP6:%.*]] = mul nsw i32 1, [[TMP1]]
// SIMD-ONLY12-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP6]]
// SIMD-ONLY12-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1
// SIMD-ONLY12-NEXT:    store i16 [[CONV4]], i16* [[ARRAYIDX5]], align 2
// SIMD-ONLY12-NEXT:    [[TMP7:%.*]] = mul nsw i32 1, [[TMP1]]
// SIMD-ONLY12-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP7]]
// SIMD-ONLY12-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX6]], i32 1
// SIMD-ONLY12-NEXT:    [[TMP8:%.*]] = load i16, i16* [[ARRAYIDX7]], align 2
// SIMD-ONLY12-NEXT:    [[CONV8:%.*]] = sext i16 [[TMP8]] to i32
// SIMD-ONLY12-NEXT:    [[TMP9:%.*]] = load i32, i32* [[B]], align 4
// SIMD-ONLY12-NEXT:    [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[TMP9]]
// SIMD-ONLY12-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
// SIMD-ONLY12-NEXT:    call void @llvm.stackrestore(i8* [[TMP10]])
// SIMD-ONLY12-NEXT:    ret i32 [[ADD9]]
//
//
// SIMD-ONLY12-LABEL: define {{[^@]+}}@_ZL7fstatici
// SIMD-ONLY12-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] {
// SIMD-ONLY12-NEXT:  entry:
// SIMD-ONLY12-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY12-NEXT:    [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY12-NEXT:    [[AAA:%.*]] = alloca i8, align 1
// SIMD-ONLY12-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
// SIMD-ONLY12-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// SIMD-ONLY12-NEXT:    store i32 0, i32* [[A]], align 4
// SIMD-ONLY12-NEXT:    store i8 0, i8* [[AAA]], align 1
// SIMD-ONLY12-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY12-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// SIMD-ONLY12-NEXT:    store i32 [[ADD]], i32* [[A]], align 4
// SIMD-ONLY12-NEXT:    [[TMP1:%.*]] = load i8, i8* [[AAA]], align 1
// SIMD-ONLY12-NEXT:    [[CONV:%.*]] = sext i8 [[TMP1]] to i32
// SIMD-ONLY12-NEXT:    [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
// SIMD-ONLY12-NEXT:    [[CONV2:%.*]] = trunc i32 [[ADD1]] to i8
// SIMD-ONLY12-NEXT:    store i8 [[CONV2]], i8* [[AAA]], align 1
// SIMD-ONLY12-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B]], i32 0, i32 2
// SIMD-ONLY12-NEXT:    [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// SIMD-ONLY12-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP2]], 1
// SIMD-ONLY12-NEXT:    store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4
// SIMD-ONLY12-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY12-NEXT:    ret i32 [[TMP3]]
//
//
// SIMD-ONLY12-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i
// SIMD-ONLY12-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] comdat {
// SIMD-ONLY12-NEXT:  entry:
// SIMD-ONLY12-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY12-NEXT:    [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY12-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
// SIMD-ONLY12-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// SIMD-ONLY12-NEXT:    store i32 0, i32* [[A]], align 4
// SIMD-ONLY12-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY12-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// SIMD-ONLY12-NEXT:    store i32 [[ADD]], i32* [[A]], align 4
// SIMD-ONLY12-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B]], i32 0, i32 2
// SIMD-ONLY12-NEXT:    [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// SIMD-ONLY12-NEXT:    [[ADD1:%.*]] = add nsw i32 [[TMP1]], 1
// SIMD-ONLY12-NEXT:    store i32 [[ADD1]], i32* [[ARRAYIDX]], align 4
// SIMD-ONLY12-NEXT:    [[TMP2:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY12-NEXT:    ret i32 [[TMP2]]
//
//
// SIMD-ONLY13-LABEL: define {{[^@]+}}@_Z3fooiPd
// SIMD-ONLY13-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
// SIMD-ONLY13-NEXT:  entry:
// SIMD-ONLY13-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY13-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 4
// SIMD-ONLY13-NEXT:    [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY13-NEXT:    [[AA:%.*]] = alloca i16, align 2
// SIMD-ONLY13-NEXT:    [[B:%.*]] = alloca [10 x float], align 4
// SIMD-ONLY13-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 4
// SIMD-ONLY13-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// SIMD-ONLY13-NEXT:    [[C:%.*]] = alloca [5 x [10 x double]], align 8
// SIMD-ONLY13-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i32, align 4
// SIMD-ONLY13-NEXT:    [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 4
// SIMD-ONLY13-NEXT:    [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4
// SIMD-ONLY13-NEXT:    [[P:%.*]] = alloca i32*, align 64
// SIMD-ONLY13-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// SIMD-ONLY13-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 4
// SIMD-ONLY13-NEXT:    store i32 0, i32* [[A]], align 4
// SIMD-ONLY13-NEXT:    store i16 0, i16* [[AA]], align 2
// SIMD-ONLY13-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY13-NEXT:    [[TMP1:%.*]] = call i8* @llvm.stacksave()
// SIMD-ONLY13-NEXT:    store i8* [[TMP1]], i8** [[SAVED_STACK]], align 4
// SIMD-ONLY13-NEXT:    [[VLA:%.*]] = alloca float, i32 [[TMP0]], align 4
// SIMD-ONLY13-NEXT:    store i32 [[TMP0]], i32* [[__VLA_EXPR0]], align 4
// SIMD-ONLY13-NEXT:    [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY13-NEXT:    [[TMP3:%.*]] = mul nuw i32 5, [[TMP2]]
// SIMD-ONLY13-NEXT:    [[VLA1:%.*]] = alloca double, i32 [[TMP3]], align 8
// SIMD-ONLY13-NEXT:    store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4
// SIMD-ONLY13-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0
// SIMD-ONLY13-NEXT:    [[TMP4:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY13-NEXT:    store i32 [[TMP4]], i32* [[X]], align 4
// SIMD-ONLY13-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1
// SIMD-ONLY13-NEXT:    [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY13-NEXT:    store i32 [[TMP5]], i32* [[Y]], align 4
// SIMD-ONLY13-NEXT:    store i32* [[A]], i32** [[P]], align 64
// SIMD-ONLY13-NEXT:    [[TMP6:%.*]] = load i16, i16* [[AA]], align 2
// SIMD-ONLY13-NEXT:    [[CONV:%.*]] = sext i16 [[TMP6]] to i32
// SIMD-ONLY13-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV]], 1
// SIMD-ONLY13-NEXT:    [[CONV2:%.*]] = trunc i32 [[ADD]] to i16
// SIMD-ONLY13-NEXT:    store i16 [[CONV2]], i16* [[AA]], align 2
// SIMD-ONLY13-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B]], i32 0, i32 2
// SIMD-ONLY13-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX]], align 4
// SIMD-ONLY13-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[VLA]], i32 3
// SIMD-ONLY13-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX3]], align 4
// SIMD-ONLY13-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C]], i32 0, i32 1
// SIMD-ONLY13-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX4]], i32 0, i32 2
// SIMD-ONLY13-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX5]], align 8
// SIMD-ONLY13-NEXT:    [[TMP7:%.*]] = mul nsw i32 1, [[TMP2]]
// SIMD-ONLY13-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[VLA1]], i32 [[TMP7]]
// SIMD-ONLY13-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX6]], i32 3
// SIMD-ONLY13-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX7]], align 8
// SIMD-ONLY13-NEXT:    [[X8:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D]], i32 0, i32 0
// SIMD-ONLY13-NEXT:    store i64 1, i64* [[X8]], align 4
// SIMD-ONLY13-NEXT:    [[Y9:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D]], i32 0, i32 1
// SIMD-ONLY13-NEXT:    store i8 1, i8* [[Y9]], align 4
// SIMD-ONLY13-NEXT:    [[X10:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0
// SIMD-ONLY13-NEXT:    [[TMP8:%.*]] = load i32, i32* [[X10]], align 4
// SIMD-ONLY13-NEXT:    [[CONV11:%.*]] = sitofp i32 [[TMP8]] to double
// SIMD-ONLY13-NEXT:    [[TMP9:%.*]] = load double*, double** [[PTR_ADDR]], align 4
// SIMD-ONLY13-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds double, double* [[TMP9]], i32 0
// SIMD-ONLY13-NEXT:    store double [[CONV11]], double* [[ARRAYIDX12]], align 4
// SIMD-ONLY13-NEXT:    [[TMP10:%.*]] = load double*, double** [[PTR_ADDR]], align 4
// SIMD-ONLY13-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP10]], i32 0
// SIMD-ONLY13-NEXT:    [[TMP11:%.*]] = load double, double* [[ARRAYIDX13]], align 4
// SIMD-ONLY13-NEXT:    [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00
// SIMD-ONLY13-NEXT:    store double [[INC]], double* [[ARRAYIDX13]], align 4
// SIMD-ONLY13-NEXT:    [[TMP12:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY13-NEXT:    [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
// SIMD-ONLY13-NEXT:    call void @llvm.stackrestore(i8* [[TMP13]])
// SIMD-ONLY13-NEXT:    ret i32 [[TMP12]]
//
//
// SIMD-ONLY13-LABEL: define {{[^@]+}}@_Z3bariPd
// SIMD-ONLY13-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] {
// SIMD-ONLY13-NEXT:  entry:
// SIMD-ONLY13-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY13-NEXT:    [[PTR_ADDR:%.*]] = alloca double*, align 4
// SIMD-ONLY13-NEXT:    [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY13-NEXT:    [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 4
// SIMD-ONLY13-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// SIMD-ONLY13-NEXT:    store double* [[PTR]], double** [[PTR_ADDR]], align 4
// SIMD-ONLY13-NEXT:    store i32 0, i32* [[A]], align 4
// SIMD-ONLY13-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY13-NEXT:    [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 4
// SIMD-ONLY13-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z3fooiPd(i32 noundef [[TMP0]], double* noundef [[TMP1]])
// SIMD-ONLY13-NEXT:    [[TMP2:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY13-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]]
// SIMD-ONLY13-NEXT:    store i32 [[ADD]], i32* [[A]], align 4
// SIMD-ONLY13-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY13-NEXT:    [[CALL1:%.*]] = call noundef i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 4 dereferenceable(8) [[S]], i32 noundef [[TMP3]])
// SIMD-ONLY13-NEXT:    [[TMP4:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY13-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]]
// SIMD-ONLY13-NEXT:    store i32 [[ADD2]], i32* [[A]], align 4
// SIMD-ONLY13-NEXT:    [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY13-NEXT:    [[CALL3:%.*]] = call noundef i32 @_ZL7fstatici(i32 noundef [[TMP5]])
// SIMD-ONLY13-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY13-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]]
// SIMD-ONLY13-NEXT:    store i32 [[ADD4]], i32* [[A]], align 4
// SIMD-ONLY13-NEXT:    [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY13-NEXT:    [[CALL5:%.*]] = call noundef i32 @_Z9ftemplateIiET_i(i32 noundef [[TMP7]])
// SIMD-ONLY13-NEXT:    [[TMP8:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY13-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]]
// SIMD-ONLY13-NEXT:    store i32 [[ADD6]], i32* [[A]], align 4
// SIMD-ONLY13-NEXT:    [[TMP9:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY13-NEXT:    ret i32 [[TMP9]]
//
//
// SIMD-ONLY13-LABEL: define {{[^@]+}}@_ZN2S12r1Ei
// SIMD-ONLY13-SAME: (%struct.S1* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[N:%.*]]) #[[ATTR0]] comdat align 2 {
// SIMD-ONLY13-NEXT:  entry:
// SIMD-ONLY13-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4
// SIMD-ONLY13-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY13-NEXT:    [[B:%.*]] = alloca i32, align 4
// SIMD-ONLY13-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 4
// SIMD-ONLY13-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i32, align 4
// SIMD-ONLY13-NEXT:    store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4
// SIMD-ONLY13-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// SIMD-ONLY13-NEXT:    [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4
// SIMD-ONLY13-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY13-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// SIMD-ONLY13-NEXT:    store i32 [[ADD]], i32* [[B]], align 4
// SIMD-ONLY13-NEXT:    [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4
// SIMD-ONLY13-NEXT:    [[TMP2:%.*]] = call i8* @llvm.stacksave()
// SIMD-ONLY13-NEXT:    store i8* [[TMP2]], i8** [[SAVED_STACK]], align 4
// SIMD-ONLY13-NEXT:    [[TMP3:%.*]] = mul nuw i32 2, [[TMP1]]
// SIMD-ONLY13-NEXT:    [[VLA:%.*]] = alloca i16, i32 [[TMP3]], align 2
// SIMD-ONLY13-NEXT:    store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4
// SIMD-ONLY13-NEXT:    [[TMP4:%.*]] = load i32, i32* [[B]], align 4
// SIMD-ONLY13-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP4]] to double
// SIMD-ONLY13-NEXT:    [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00
// SIMD-ONLY13-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
// SIMD-ONLY13-NEXT:    store double [[ADD2]], double* [[A]], align 4
// SIMD-ONLY13-NEXT:    [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[THIS1]], i32 0, i32 0
// SIMD-ONLY13-NEXT:    [[TMP5:%.*]] = load double, double* [[A3]], align 4
// SIMD-ONLY13-NEXT:    [[INC:%.*]] = fadd double [[TMP5]], 1.000000e+00
// SIMD-ONLY13-NEXT:    store double [[INC]], double* [[A3]], align 4
// SIMD-ONLY13-NEXT:    [[CONV4:%.*]] = fptosi double [[INC]] to i16
// SIMD-ONLY13-NEXT:    [[TMP6:%.*]] = mul nsw i32 1, [[TMP1]]
// SIMD-ONLY13-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP6]]
// SIMD-ONLY13-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1
// SIMD-ONLY13-NEXT:    store i16 [[CONV4]], i16* [[ARRAYIDX5]], align 2
// SIMD-ONLY13-NEXT:    [[TMP7:%.*]] = mul nsw i32 1, [[TMP1]]
// SIMD-ONLY13-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP7]]
// SIMD-ONLY13-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX6]], i32 1
// SIMD-ONLY13-NEXT:    [[TMP8:%.*]] = load i16, i16* [[ARRAYIDX7]], align 2
// SIMD-ONLY13-NEXT:    [[CONV8:%.*]] = sext i16 [[TMP8]] to i32
// SIMD-ONLY13-NEXT:    [[TMP9:%.*]] = load i32, i32* [[B]], align 4
// SIMD-ONLY13-NEXT:    [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[TMP9]]
// SIMD-ONLY13-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
// SIMD-ONLY13-NEXT:    call void @llvm.stackrestore(i8* [[TMP10]])
// SIMD-ONLY13-NEXT:    ret i32 [[ADD9]]
//
//
// SIMD-ONLY13-LABEL: define {{[^@]+}}@_ZL7fstatici
// SIMD-ONLY13-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] {
// SIMD-ONLY13-NEXT:  entry:
// SIMD-ONLY13-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY13-NEXT:    [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY13-NEXT:    [[AAA:%.*]] = alloca i8, align 1
// SIMD-ONLY13-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
// SIMD-ONLY13-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// SIMD-ONLY13-NEXT:    store i32 0, i32* [[A]], align 4
// SIMD-ONLY13-NEXT:    store i8 0, i8* [[AAA]], align 1
// SIMD-ONLY13-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY13-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// SIMD-ONLY13-NEXT:    store i32 [[ADD]], i32* [[A]], align 4
// SIMD-ONLY13-NEXT:    [[TMP1:%.*]] = load i8, i8* [[AAA]], align 1
// SIMD-ONLY13-NEXT:    [[CONV:%.*]] = sext i8 [[TMP1]] to i32
// SIMD-ONLY13-NEXT:    [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
// SIMD-ONLY13-NEXT:    [[CONV2:%.*]] = trunc i32 [[ADD1]] to i8
// SIMD-ONLY13-NEXT:    store i8 [[CONV2]], i8* [[AAA]], align 1
// SIMD-ONLY13-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B]], i32 0, i32 2
// SIMD-ONLY13-NEXT:    [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// SIMD-ONLY13-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP2]], 1
// SIMD-ONLY13-NEXT:    store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4
// SIMD-ONLY13-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY13-NEXT:    ret i32 [[TMP3]]
//
//
// SIMD-ONLY13-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i
// SIMD-ONLY13-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] comdat {
// SIMD-ONLY13-NEXT:  entry:
// SIMD-ONLY13-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
// SIMD-ONLY13-NEXT:    [[A:%.*]] = alloca i32, align 4
// SIMD-ONLY13-NEXT:    [[B:%.*]] = alloca [10 x i32], align 4
// SIMD-ONLY13-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
// SIMD-ONLY13-NEXT:    store i32 0, i32* [[A]], align 4
// SIMD-ONLY13-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY13-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
// SIMD-ONLY13-NEXT:    store i32 [[ADD]], i32* [[A]], align 4
// SIMD-ONLY13-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B]], i32 0, i32 2
// SIMD-ONLY13-NEXT:    [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
// SIMD-ONLY13-NEXT:    [[ADD1:%.*]] = add nsw i32 [[TMP1]], 1
// SIMD-ONLY13-NEXT:    store i32 [[ADD1]], i32* [[ARRAYIDX]], align 4
// SIMD-ONLY13-NEXT:    [[TMP2:%.*]] = load i32, i32* [[A]], align 4
// SIMD-ONLY13-NEXT:    ret i32 [[TMP2]]
//
